intel_bufmgr_gem.c revision 515cea6ac67eb458c59fececc3c67411ee6fd3c3
1/************************************************************************** 2 * 3 * Copyright � 2007 Red Hat Inc. 4 * Copyright � 2007 Intel Corporation 5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA 6 * All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the 10 * "Software"), to deal in the Software without restriction, including 11 * without limitation the rights to use, copy, modify, merge, publish, 12 * distribute, sub license, and/or sell copies of the Software, and to 13 * permit persons to whom the Software is furnished to do so, subject to 14 * the following conditions: 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * The above copyright notice and this permission notice (including the 25 * next paragraph) shall be included in all copies or substantial portions 26 * of the Software. 27 * 28 * 29 **************************************************************************/ 30/* 31 * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com> 32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com> 33 * Eric Anholt <eric@anholt.net> 34 * Dave Airlie <airlied@linux.ie> 35 */ 36 37#ifdef HAVE_CONFIG_H 38#include "config.h" 39#endif 40 41#include <xf86drm.h> 42#include <xf86atomic.h> 43#include <fcntl.h> 44#include <stdio.h> 45#include <stdlib.h> 46#include <string.h> 47#include <unistd.h> 48#include <assert.h> 49#include <pthread.h> 50#include <sys/ioctl.h> 51#include <sys/mman.h> 52#include <sys/stat.h> 53#include <sys/types.h> 54#include <stdbool.h> 55 56#include "errno.h" 57#include "libdrm_lists.h" 58#include "intel_bufmgr.h" 59#include "intel_bufmgr_priv.h" 60#include "intel_chipset.h" 61#include "string.h" 62 63#include "i915_drm.h" 64 65#define DBG(...) do { \ 66 if (bufmgr_gem->bufmgr.debug) \ 67 fprintf(stderr, __VA_ARGS__); \ 68} while (0) 69 70#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 71 72typedef struct _drm_intel_bo_gem drm_intel_bo_gem; 73 74struct drm_intel_gem_bo_bucket { 75 drmMMListHead head; 76 unsigned long size; 77}; 78 79typedef struct _drm_intel_bufmgr_gem { 80 drm_intel_bufmgr bufmgr; 81 82 int fd; 83 84 int max_relocs; 85 86 pthread_mutex_t lock; 87 88 struct drm_i915_gem_exec_object *exec_objects; 89 struct drm_i915_gem_exec_object2 *exec2_objects; 90 drm_intel_bo **exec_bos; 91 int exec_size; 92 int exec_count; 93 94 /** Array of lists of cached gem objects of power-of-two sizes */ 95 struct drm_intel_gem_bo_bucket cache_bucket[14 * 4]; 96 int num_buckets; 97 time_t time; 98 99 drmMMListHead named; 100 101 uint64_t gtt_size; 102 int available_fences; 103 int pci_device; 104 int gen; 105 unsigned int has_bsd : 1; 106 unsigned int has_blt : 1; 107 unsigned int has_relaxed_fencing : 1; 108 unsigned int bo_reuse : 1; 109 bool fenced_relocs; 110} drm_intel_bufmgr_gem; 111 112#define DRM_INTEL_RELOC_FENCE (1<<0) 113 114typedef struct _drm_intel_reloc_target_info { 115 drm_intel_bo *bo; 116 int flags; 117} drm_intel_reloc_target; 118 119struct _drm_intel_bo_gem { 120 drm_intel_bo bo; 121 122 atomic_t refcount; 123 uint32_t gem_handle; 124 const char *name; 125 126 /** 127 * Kenel-assigned global name for this object 128 */ 129 unsigned int global_name; 130 drmMMListHead name_list; 131 132 /** 133 * Index of the buffer within the validation list while preparing a 134 * batchbuffer execution. 135 */ 136 int validate_index; 137 138 /** 139 * Current tiling mode 140 */ 141 uint32_t tiling_mode; 142 uint32_t swizzle_mode; 143 unsigned long stride; 144 145 time_t free_time; 146 147 /** Array passed to the DRM containing relocation information. */ 148 struct drm_i915_gem_relocation_entry *relocs; 149 /** 150 * Array of info structs corresponding to relocs[i].target_handle etc 151 */ 152 drm_intel_reloc_target *reloc_target_info; 153 /** Number of entries in relocs */ 154 int reloc_count; 155 /** Mapped address for the buffer, saved across map/unmap cycles */ 156 void *mem_virtual; 157 /** GTT virtual address for the buffer, saved across map/unmap cycles */ 158 void *gtt_virtual; 159 160 /** BO cache list */ 161 drmMMListHead head; 162 163 /** 164 * Boolean of whether this BO and its children have been included in 165 * the current drm_intel_bufmgr_check_aperture_space() total. 166 */ 167 bool included_in_check_aperture; 168 169 /** 170 * Boolean of whether this buffer has been used as a relocation 171 * target and had its size accounted for, and thus can't have any 172 * further relocations added to it. 173 */ 174 bool used_as_reloc_target; 175 176 /** 177 * Boolean of whether we have encountered an error whilst building the relocation tree. 178 */ 179 bool has_error; 180 181 /** 182 * Boolean of whether this buffer can be re-used 183 */ 184 bool reusable; 185 186 /** 187 * Size in bytes of this buffer and its relocation descendents. 188 * 189 * Used to avoid costly tree walking in 190 * drm_intel_bufmgr_check_aperture in the common case. 191 */ 192 int reloc_tree_size; 193 194 /** 195 * Number of potential fence registers required by this buffer and its 196 * relocations. 197 */ 198 int reloc_tree_fences; 199}; 200 201static unsigned int 202drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); 203 204static unsigned int 205drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count); 206 207static int 208drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 209 uint32_t * swizzle_mode); 210 211static int 212drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 213 uint32_t tiling_mode, 214 uint32_t stride); 215 216static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 217 time_t time); 218 219static void drm_intel_gem_bo_unreference(drm_intel_bo *bo); 220 221static void drm_intel_gem_bo_free(drm_intel_bo *bo); 222 223static unsigned long 224drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, 225 uint32_t *tiling_mode) 226{ 227 unsigned long min_size, max_size; 228 unsigned long i; 229 230 if (*tiling_mode == I915_TILING_NONE) 231 return size; 232 233 /* 965+ just need multiples of page size for tiling */ 234 if (bufmgr_gem->gen >= 4) 235 return ROUND_UP_TO(size, 4096); 236 237 /* Older chips need powers of two, of at least 512k or 1M */ 238 if (bufmgr_gem->gen == 3) { 239 min_size = 1024*1024; 240 max_size = 128*1024*1024; 241 } else { 242 min_size = 512*1024; 243 max_size = 64*1024*1024; 244 } 245 246 if (size > max_size) { 247 *tiling_mode = I915_TILING_NONE; 248 return size; 249 } 250 251 /* Do we need to allocate every page for the fence? */ 252 if (bufmgr_gem->has_relaxed_fencing) 253 return ROUND_UP_TO(size, 4096); 254 255 for (i = min_size; i < size; i <<= 1) 256 ; 257 258 return i; 259} 260 261/* 262 * Round a given pitch up to the minimum required for X tiling on a 263 * given chip. We use 512 as the minimum to allow for a later tiling 264 * change. 265 */ 266static unsigned long 267drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, 268 unsigned long pitch, uint32_t *tiling_mode) 269{ 270 unsigned long tile_width; 271 unsigned long i; 272 273 /* If untiled, then just align it so that we can do rendering 274 * to it with the 3D engine. 275 */ 276 if (*tiling_mode == I915_TILING_NONE) 277 return ALIGN(pitch, 64); 278 279 if (*tiling_mode == I915_TILING_X 280 || (IS_915(bufmgr_gem) && *tiling_mode == I915_TILING_Y)) 281 tile_width = 512; 282 else 283 tile_width = 128; 284 285 /* 965 is flexible */ 286 if (bufmgr_gem->gen >= 4) 287 return ROUND_UP_TO(pitch, tile_width); 288 289 /* The older hardware has a maximum pitch of 8192 with tiled 290 * surfaces, so fallback to untiled if it's too large. 291 */ 292 if (pitch > 8192) { 293 *tiling_mode = I915_TILING_NONE; 294 return ALIGN(pitch, 64); 295 } 296 297 /* Pre-965 needs power of two tile width */ 298 for (i = tile_width; i < pitch; i <<= 1) 299 ; 300 301 return i; 302} 303 304static struct drm_intel_gem_bo_bucket * 305drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, 306 unsigned long size) 307{ 308 int i; 309 310 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 311 struct drm_intel_gem_bo_bucket *bucket = 312 &bufmgr_gem->cache_bucket[i]; 313 if (bucket->size >= size) { 314 return bucket; 315 } 316 } 317 318 return NULL; 319} 320 321static void 322drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) 323{ 324 int i, j; 325 326 for (i = 0; i < bufmgr_gem->exec_count; i++) { 327 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 328 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 329 330 if (bo_gem->relocs == NULL) { 331 DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, 332 bo_gem->name); 333 continue; 334 } 335 336 for (j = 0; j < bo_gem->reloc_count; j++) { 337 drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo; 338 drm_intel_bo_gem *target_gem = 339 (drm_intel_bo_gem *) target_bo; 340 341 DBG("%2d: %d (%s)@0x%08llx -> " 342 "%d (%s)@0x%08lx + 0x%08x\n", 343 i, 344 bo_gem->gem_handle, bo_gem->name, 345 (unsigned long long)bo_gem->relocs[j].offset, 346 target_gem->gem_handle, 347 target_gem->name, 348 target_bo->offset, 349 bo_gem->relocs[j].delta); 350 } 351 } 352} 353 354static inline void 355drm_intel_gem_bo_reference(drm_intel_bo *bo) 356{ 357 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 358 359 atomic_inc(&bo_gem->refcount); 360} 361 362/** 363 * Adds the given buffer to the list of buffers to be validated (moved into the 364 * appropriate memory type) with the next batch submission. 365 * 366 * If a buffer is validated multiple times in a batch submission, it ends up 367 * with the intersection of the memory type flags and the union of the 368 * access flags. 369 */ 370static void 371drm_intel_add_validate_buffer(drm_intel_bo *bo) 372{ 373 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 374 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 375 int index; 376 377 if (bo_gem->validate_index != -1) 378 return; 379 380 /* Extend the array of validation entries as necessary. */ 381 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 382 int new_size = bufmgr_gem->exec_size * 2; 383 384 if (new_size == 0) 385 new_size = 5; 386 387 bufmgr_gem->exec_objects = 388 realloc(bufmgr_gem->exec_objects, 389 sizeof(*bufmgr_gem->exec_objects) * new_size); 390 bufmgr_gem->exec_bos = 391 realloc(bufmgr_gem->exec_bos, 392 sizeof(*bufmgr_gem->exec_bos) * new_size); 393 bufmgr_gem->exec_size = new_size; 394 } 395 396 index = bufmgr_gem->exec_count; 397 bo_gem->validate_index = index; 398 /* Fill in array entry */ 399 bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; 400 bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; 401 bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs; 402 bufmgr_gem->exec_objects[index].alignment = 0; 403 bufmgr_gem->exec_objects[index].offset = 0; 404 bufmgr_gem->exec_bos[index] = bo; 405 bufmgr_gem->exec_count++; 406} 407 408static void 409drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) 410{ 411 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 412 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 413 int index; 414 415 if (bo_gem->validate_index != -1) { 416 if (need_fence) 417 bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= 418 EXEC_OBJECT_NEEDS_FENCE; 419 return; 420 } 421 422 /* Extend the array of validation entries as necessary. */ 423 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 424 int new_size = bufmgr_gem->exec_size * 2; 425 426 if (new_size == 0) 427 new_size = 5; 428 429 bufmgr_gem->exec2_objects = 430 realloc(bufmgr_gem->exec2_objects, 431 sizeof(*bufmgr_gem->exec2_objects) * new_size); 432 bufmgr_gem->exec_bos = 433 realloc(bufmgr_gem->exec_bos, 434 sizeof(*bufmgr_gem->exec_bos) * new_size); 435 bufmgr_gem->exec_size = new_size; 436 } 437 438 index = bufmgr_gem->exec_count; 439 bo_gem->validate_index = index; 440 /* Fill in array entry */ 441 bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle; 442 bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; 443 bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; 444 bufmgr_gem->exec2_objects[index].alignment = 0; 445 bufmgr_gem->exec2_objects[index].offset = 0; 446 bufmgr_gem->exec_bos[index] = bo; 447 bufmgr_gem->exec2_objects[index].flags = 0; 448 bufmgr_gem->exec2_objects[index].rsvd1 = 0; 449 bufmgr_gem->exec2_objects[index].rsvd2 = 0; 450 if (need_fence) { 451 bufmgr_gem->exec2_objects[index].flags |= 452 EXEC_OBJECT_NEEDS_FENCE; 453 } 454 bufmgr_gem->exec_count++; 455} 456 457#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ 458 sizeof(uint32_t)) 459 460static void 461drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, 462 drm_intel_bo_gem *bo_gem) 463{ 464 int size; 465 466 assert(!bo_gem->used_as_reloc_target); 467 468 /* The older chipsets are far-less flexible in terms of tiling, 469 * and require tiled buffer to be size aligned in the aperture. 470 * This means that in the worst possible case we will need a hole 471 * twice as large as the object in order for it to fit into the 472 * aperture. Optimal packing is for wimps. 473 */ 474 size = bo_gem->bo.size; 475 if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) { 476 int min_size; 477 478 if (bufmgr_gem->has_relaxed_fencing) { 479 if (bufmgr_gem->gen == 3) 480 min_size = 1024*1024; 481 else 482 min_size = 512*1024; 483 484 while (min_size < size) 485 min_size *= 2; 486 } else 487 min_size = size; 488 489 /* Account for worst-case alignment. */ 490 size = 2 * min_size; 491 } 492 493 bo_gem->reloc_tree_size = size; 494} 495 496static int 497drm_intel_setup_reloc_list(drm_intel_bo *bo) 498{ 499 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 500 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 501 unsigned int max_relocs = bufmgr_gem->max_relocs; 502 503 if (bo->size / 4 < max_relocs) 504 max_relocs = bo->size / 4; 505 506 bo_gem->relocs = malloc(max_relocs * 507 sizeof(struct drm_i915_gem_relocation_entry)); 508 bo_gem->reloc_target_info = malloc(max_relocs * 509 sizeof(drm_intel_reloc_target)); 510 if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) { 511 bo_gem->has_error = true; 512 513 free (bo_gem->relocs); 514 bo_gem->relocs = NULL; 515 516 free (bo_gem->reloc_target_info); 517 bo_gem->reloc_target_info = NULL; 518 519 return 1; 520 } 521 522 return 0; 523} 524 525static int 526drm_intel_gem_bo_busy(drm_intel_bo *bo) 527{ 528 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 529 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 530 struct drm_i915_gem_busy busy; 531 int ret; 532 533 memset(&busy, 0, sizeof(busy)); 534 busy.handle = bo_gem->gem_handle; 535 536 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 537 538 return (ret == 0 && busy.busy); 539} 540 541static int 542drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, 543 drm_intel_bo_gem *bo_gem, int state) 544{ 545 struct drm_i915_gem_madvise madv; 546 547 madv.handle = bo_gem->gem_handle; 548 madv.madv = state; 549 madv.retained = 1; 550 drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 551 552 return madv.retained; 553} 554 555static int 556drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv) 557{ 558 return drm_intel_gem_bo_madvise_internal 559 ((drm_intel_bufmgr_gem *) bo->bufmgr, 560 (drm_intel_bo_gem *) bo, 561 madv); 562} 563 564/* drop the oldest entries that have been purged by the kernel */ 565static void 566drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, 567 struct drm_intel_gem_bo_bucket *bucket) 568{ 569 while (!DRMLISTEMPTY(&bucket->head)) { 570 drm_intel_bo_gem *bo_gem; 571 572 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 573 bucket->head.next, head); 574 if (drm_intel_gem_bo_madvise_internal 575 (bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) 576 break; 577 578 DRMLISTDEL(&bo_gem->head); 579 drm_intel_gem_bo_free(&bo_gem->bo); 580 } 581} 582 583static drm_intel_bo * 584drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, 585 const char *name, 586 unsigned long size, 587 unsigned long flags, 588 uint32_t tiling_mode, 589 unsigned long stride) 590{ 591 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 592 drm_intel_bo_gem *bo_gem; 593 unsigned int page_size = getpagesize(); 594 int ret; 595 struct drm_intel_gem_bo_bucket *bucket; 596 bool alloc_from_cache; 597 unsigned long bo_size; 598 bool for_render = false; 599 600 if (flags & BO_ALLOC_FOR_RENDER) 601 for_render = true; 602 603 /* Round the allocated size up to a power of two number of pages. */ 604 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); 605 606 /* If we don't have caching at this size, don't actually round the 607 * allocation up. 608 */ 609 if (bucket == NULL) { 610 bo_size = size; 611 if (bo_size < page_size) 612 bo_size = page_size; 613 } else { 614 bo_size = bucket->size; 615 } 616 617 pthread_mutex_lock(&bufmgr_gem->lock); 618 /* Get a buffer out of the cache if available */ 619retry: 620 alloc_from_cache = false; 621 if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { 622 if (for_render) { 623 /* Allocate new render-target BOs from the tail (MRU) 624 * of the list, as it will likely be hot in the GPU 625 * cache and in the aperture for us. 626 */ 627 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 628 bucket->head.prev, head); 629 DRMLISTDEL(&bo_gem->head); 630 alloc_from_cache = true; 631 } else { 632 /* For non-render-target BOs (where we're probably 633 * going to map it first thing in order to fill it 634 * with data), check if the last BO in the cache is 635 * unbusy, and only reuse in that case. Otherwise, 636 * allocating a new buffer is probably faster than 637 * waiting for the GPU to finish. 638 */ 639 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 640 bucket->head.next, head); 641 if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { 642 alloc_from_cache = true; 643 DRMLISTDEL(&bo_gem->head); 644 } 645 } 646 647 if (alloc_from_cache) { 648 if (!drm_intel_gem_bo_madvise_internal 649 (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { 650 drm_intel_gem_bo_free(&bo_gem->bo); 651 drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, 652 bucket); 653 goto retry; 654 } 655 656 if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 657 tiling_mode, 658 stride)) { 659 drm_intel_gem_bo_free(&bo_gem->bo); 660 goto retry; 661 } 662 } 663 } 664 pthread_mutex_unlock(&bufmgr_gem->lock); 665 666 if (!alloc_from_cache) { 667 struct drm_i915_gem_create create; 668 669 bo_gem = calloc(1, sizeof(*bo_gem)); 670 if (!bo_gem) 671 return NULL; 672 673 bo_gem->bo.size = bo_size; 674 memset(&create, 0, sizeof(create)); 675 create.size = bo_size; 676 677 ret = drmIoctl(bufmgr_gem->fd, 678 DRM_IOCTL_I915_GEM_CREATE, 679 &create); 680 bo_gem->gem_handle = create.handle; 681 bo_gem->bo.handle = bo_gem->gem_handle; 682 if (ret != 0) { 683 free(bo_gem); 684 return NULL; 685 } 686 bo_gem->bo.bufmgr = bufmgr; 687 688 bo_gem->tiling_mode = I915_TILING_NONE; 689 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 690 bo_gem->stride = 0; 691 692 if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 693 tiling_mode, 694 stride)) { 695 drm_intel_gem_bo_free(&bo_gem->bo); 696 return NULL; 697 } 698 699 DRMINITLISTHEAD(&bo_gem->name_list); 700 } 701 702 bo_gem->name = name; 703 atomic_set(&bo_gem->refcount, 1); 704 bo_gem->validate_index = -1; 705 bo_gem->reloc_tree_fences = 0; 706 bo_gem->used_as_reloc_target = false; 707 bo_gem->has_error = false; 708 bo_gem->reusable = true; 709 710 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 711 712 DBG("bo_create: buf %d (%s) %ldb\n", 713 bo_gem->gem_handle, bo_gem->name, size); 714 715 return &bo_gem->bo; 716} 717 718static drm_intel_bo * 719drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, 720 const char *name, 721 unsigned long size, 722 unsigned int alignment) 723{ 724 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 725 BO_ALLOC_FOR_RENDER, 726 I915_TILING_NONE, 0); 727} 728 729static drm_intel_bo * 730drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, 731 const char *name, 732 unsigned long size, 733 unsigned int alignment) 734{ 735 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0, 736 I915_TILING_NONE, 0); 737} 738 739static drm_intel_bo * 740drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, 741 int x, int y, int cpp, uint32_t *tiling_mode, 742 unsigned long *pitch, unsigned long flags) 743{ 744 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 745 unsigned long size, stride; 746 uint32_t tiling; 747 748 do { 749 unsigned long aligned_y, height_alignment; 750 751 tiling = *tiling_mode; 752 753 /* If we're tiled, our allocations are in 8 or 32-row blocks, 754 * so failure to align our height means that we won't allocate 755 * enough pages. 756 * 757 * If we're untiled, we still have to align to 2 rows high 758 * because the data port accesses 2x2 blocks even if the 759 * bottom row isn't to be rendered, so failure to align means 760 * we could walk off the end of the GTT and fault. This is 761 * documented on 965, and may be the case on older chipsets 762 * too so we try to be careful. 763 */ 764 aligned_y = y; 765 height_alignment = 2; 766 767 if (IS_GEN2(bufmgr_gem) && tiling != I915_TILING_NONE) 768 height_alignment = 16; 769 else if (tiling == I915_TILING_X 770 || (IS_915(bufmgr_gem) && tiling == I915_TILING_Y)) 771 height_alignment = 8; 772 else if (tiling == I915_TILING_Y) 773 height_alignment = 32; 774 aligned_y = ALIGN(y, height_alignment); 775 776 stride = x * cpp; 777 stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode); 778 size = stride * aligned_y; 779 size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode); 780 } while (*tiling_mode != tiling); 781 *pitch = stride; 782 783 if (tiling == I915_TILING_NONE) 784 stride = 0; 785 786 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags, 787 tiling, stride); 788} 789 790/** 791 * Returns a drm_intel_bo wrapping the given buffer object handle. 792 * 793 * This can be used when one application needs to pass a buffer object 794 * to another. 795 */ 796drm_intel_bo * 797drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, 798 const char *name, 799 unsigned int handle) 800{ 801 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 802 drm_intel_bo_gem *bo_gem; 803 int ret; 804 struct drm_gem_open open_arg; 805 struct drm_i915_gem_get_tiling get_tiling; 806 drmMMListHead *list; 807 808 /* At the moment most applications only have a few named bo. 809 * For instance, in a DRI client only the render buffers passed 810 * between X and the client are named. And since X returns the 811 * alternating names for the front/back buffer a linear search 812 * provides a sufficiently fast match. 813 */ 814 for (list = bufmgr_gem->named.next; 815 list != &bufmgr_gem->named; 816 list = list->next) { 817 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); 818 if (bo_gem->global_name == handle) { 819 drm_intel_gem_bo_reference(&bo_gem->bo); 820 return &bo_gem->bo; 821 } 822 } 823 824 bo_gem = calloc(1, sizeof(*bo_gem)); 825 if (!bo_gem) 826 return NULL; 827 828 memset(&open_arg, 0, sizeof(open_arg)); 829 open_arg.name = handle; 830 ret = drmIoctl(bufmgr_gem->fd, 831 DRM_IOCTL_GEM_OPEN, 832 &open_arg); 833 if (ret != 0) { 834 DBG("Couldn't reference %s handle 0x%08x: %s\n", 835 name, handle, strerror(errno)); 836 free(bo_gem); 837 return NULL; 838 } 839 bo_gem->bo.size = open_arg.size; 840 bo_gem->bo.offset = 0; 841 bo_gem->bo.virtual = NULL; 842 bo_gem->bo.bufmgr = bufmgr; 843 bo_gem->name = name; 844 atomic_set(&bo_gem->refcount, 1); 845 bo_gem->validate_index = -1; 846 bo_gem->gem_handle = open_arg.handle; 847 bo_gem->bo.handle = open_arg.handle; 848 bo_gem->global_name = handle; 849 bo_gem->reusable = false; 850 851 memset(&get_tiling, 0, sizeof(get_tiling)); 852 get_tiling.handle = bo_gem->gem_handle; 853 ret = drmIoctl(bufmgr_gem->fd, 854 DRM_IOCTL_I915_GEM_GET_TILING, 855 &get_tiling); 856 if (ret != 0) { 857 drm_intel_gem_bo_unreference(&bo_gem->bo); 858 return NULL; 859 } 860 bo_gem->tiling_mode = get_tiling.tiling_mode; 861 bo_gem->swizzle_mode = get_tiling.swizzle_mode; 862 /* XXX stride is unknown */ 863 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 864 865 DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 866 DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); 867 868 return &bo_gem->bo; 869} 870 871static void 872drm_intel_gem_bo_free(drm_intel_bo *bo) 873{ 874 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 875 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 876 struct drm_gem_close close; 877 int ret; 878 879 if (bo_gem->mem_virtual) 880 munmap(bo_gem->mem_virtual, bo_gem->bo.size); 881 if (bo_gem->gtt_virtual) 882 munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 883 884 /* Close this object */ 885 memset(&close, 0, sizeof(close)); 886 close.handle = bo_gem->gem_handle; 887 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); 888 if (ret != 0) { 889 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 890 bo_gem->gem_handle, bo_gem->name, strerror(errno)); 891 } 892 free(bo); 893} 894 895/** Frees all cached buffers significantly older than @time. */ 896static void 897drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) 898{ 899 int i; 900 901 if (bufmgr_gem->time == time) 902 return; 903 904 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 905 struct drm_intel_gem_bo_bucket *bucket = 906 &bufmgr_gem->cache_bucket[i]; 907 908 while (!DRMLISTEMPTY(&bucket->head)) { 909 drm_intel_bo_gem *bo_gem; 910 911 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 912 bucket->head.next, head); 913 if (time - bo_gem->free_time <= 1) 914 break; 915 916 DRMLISTDEL(&bo_gem->head); 917 918 drm_intel_gem_bo_free(&bo_gem->bo); 919 } 920 } 921 922 bufmgr_gem->time = time; 923} 924 925static void 926drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) 927{ 928 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 929 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 930 struct drm_intel_gem_bo_bucket *bucket; 931 int i; 932 933 /* Unreference all the target buffers */ 934 for (i = 0; i < bo_gem->reloc_count; i++) { 935 if (bo_gem->reloc_target_info[i].bo != bo) { 936 drm_intel_gem_bo_unreference_locked_timed(bo_gem-> 937 reloc_target_info[i].bo, 938 time); 939 } 940 } 941 bo_gem->reloc_count = 0; 942 bo_gem->used_as_reloc_target = false; 943 944 DBG("bo_unreference final: %d (%s)\n", 945 bo_gem->gem_handle, bo_gem->name); 946 947 /* release memory associated with this object */ 948 if (bo_gem->reloc_target_info) { 949 free(bo_gem->reloc_target_info); 950 bo_gem->reloc_target_info = NULL; 951 } 952 if (bo_gem->relocs) { 953 free(bo_gem->relocs); 954 bo_gem->relocs = NULL; 955 } 956 957 DRMLISTDEL(&bo_gem->name_list); 958 959 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); 960 /* Put the buffer into our internal cache for reuse if we can. */ 961 if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && 962 drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, 963 I915_MADV_DONTNEED)) { 964 bo_gem->free_time = time; 965 966 bo_gem->name = NULL; 967 bo_gem->validate_index = -1; 968 969 DRMLISTADDTAIL(&bo_gem->head, &bucket->head); 970 } else { 971 drm_intel_gem_bo_free(bo); 972 } 973} 974 975static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 976 time_t time) 977{ 978 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 979 980 assert(atomic_read(&bo_gem->refcount) > 0); 981 if (atomic_dec_and_test(&bo_gem->refcount)) 982 drm_intel_gem_bo_unreference_final(bo, time); 983} 984 985static void drm_intel_gem_bo_unreference(drm_intel_bo *bo) 986{ 987 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 988 989 assert(atomic_read(&bo_gem->refcount) > 0); 990 if (atomic_dec_and_test(&bo_gem->refcount)) { 991 drm_intel_bufmgr_gem *bufmgr_gem = 992 (drm_intel_bufmgr_gem *) bo->bufmgr; 993 struct timespec time; 994 995 clock_gettime(CLOCK_MONOTONIC, &time); 996 997 pthread_mutex_lock(&bufmgr_gem->lock); 998 drm_intel_gem_bo_unreference_final(bo, time.tv_sec); 999 drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec); 1000 pthread_mutex_unlock(&bufmgr_gem->lock); 1001 } 1002} 1003 1004static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) 1005{ 1006 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1007 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1008 struct drm_i915_gem_set_domain set_domain; 1009 int ret; 1010 1011 pthread_mutex_lock(&bufmgr_gem->lock); 1012 1013 /* Allow recursive mapping. Mesa may recursively map buffers with 1014 * nested display loops. 1015 */ 1016 if (!bo_gem->mem_virtual) { 1017 struct drm_i915_gem_mmap mmap_arg; 1018 1019 DBG("bo_map: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); 1020 1021 memset(&mmap_arg, 0, sizeof(mmap_arg)); 1022 mmap_arg.handle = bo_gem->gem_handle; 1023 mmap_arg.offset = 0; 1024 mmap_arg.size = bo->size; 1025 ret = drmIoctl(bufmgr_gem->fd, 1026 DRM_IOCTL_I915_GEM_MMAP, 1027 &mmap_arg); 1028 if (ret != 0) { 1029 ret = -errno; 1030 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 1031 __FILE__, __LINE__, bo_gem->gem_handle, 1032 bo_gem->name, strerror(errno)); 1033 pthread_mutex_unlock(&bufmgr_gem->lock); 1034 return ret; 1035 } 1036 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 1037 } 1038 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 1039 bo_gem->mem_virtual); 1040 bo->virtual = bo_gem->mem_virtual; 1041 1042 set_domain.handle = bo_gem->gem_handle; 1043 set_domain.read_domains = I915_GEM_DOMAIN_CPU; 1044 if (write_enable) 1045 set_domain.write_domain = I915_GEM_DOMAIN_CPU; 1046 else 1047 set_domain.write_domain = 0; 1048 ret = drmIoctl(bufmgr_gem->fd, 1049 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1050 &set_domain); 1051 if (ret != 0) { 1052 DBG("%s:%d: Error setting to CPU domain %d: %s\n", 1053 __FILE__, __LINE__, bo_gem->gem_handle, 1054 strerror(errno)); 1055 } 1056 1057 pthread_mutex_unlock(&bufmgr_gem->lock); 1058 1059 return 0; 1060} 1061 1062int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) 1063{ 1064 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1065 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1066 struct drm_i915_gem_set_domain set_domain; 1067 int ret; 1068 1069 pthread_mutex_lock(&bufmgr_gem->lock); 1070 1071 /* Get a mapping of the buffer if we haven't before. */ 1072 if (bo_gem->gtt_virtual == NULL) { 1073 struct drm_i915_gem_mmap_gtt mmap_arg; 1074 1075 DBG("bo_map_gtt: mmap %d (%s)\n", bo_gem->gem_handle, 1076 bo_gem->name); 1077 1078 memset(&mmap_arg, 0, sizeof(mmap_arg)); 1079 mmap_arg.handle = bo_gem->gem_handle; 1080 1081 /* Get the fake offset back... */ 1082 ret = drmIoctl(bufmgr_gem->fd, 1083 DRM_IOCTL_I915_GEM_MMAP_GTT, 1084 &mmap_arg); 1085 if (ret != 0) { 1086 ret = -errno; 1087 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n", 1088 __FILE__, __LINE__, 1089 bo_gem->gem_handle, bo_gem->name, 1090 strerror(errno)); 1091 pthread_mutex_unlock(&bufmgr_gem->lock); 1092 return ret; 1093 } 1094 1095 /* and mmap it */ 1096 bo_gem->gtt_virtual = mmap(0, bo->size, PROT_READ | PROT_WRITE, 1097 MAP_SHARED, bufmgr_gem->fd, 1098 mmap_arg.offset); 1099 if (bo_gem->gtt_virtual == MAP_FAILED) { 1100 bo_gem->gtt_virtual = NULL; 1101 ret = -errno; 1102 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 1103 __FILE__, __LINE__, 1104 bo_gem->gem_handle, bo_gem->name, 1105 strerror(errno)); 1106 pthread_mutex_unlock(&bufmgr_gem->lock); 1107 return ret; 1108 } 1109 } 1110 1111 bo->virtual = bo_gem->gtt_virtual; 1112 1113 DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 1114 bo_gem->gtt_virtual); 1115 1116 /* Now move it to the GTT domain so that the CPU caches are flushed */ 1117 set_domain.handle = bo_gem->gem_handle; 1118 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1119 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 1120 ret = drmIoctl(bufmgr_gem->fd, 1121 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1122 &set_domain); 1123 if (ret != 0) { 1124 DBG("%s:%d: Error setting domain %d: %s\n", 1125 __FILE__, __LINE__, bo_gem->gem_handle, 1126 strerror(errno)); 1127 } 1128 1129 pthread_mutex_unlock(&bufmgr_gem->lock); 1130 1131 return 0; 1132} 1133 1134int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) 1135{ 1136 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1137 int ret = 0; 1138 1139 if (bo == NULL) 1140 return 0; 1141 1142 pthread_mutex_lock(&bufmgr_gem->lock); 1143 bo->virtual = NULL; 1144 pthread_mutex_unlock(&bufmgr_gem->lock); 1145 1146 return ret; 1147} 1148 1149static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) 1150{ 1151 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1152 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1153 struct drm_i915_gem_sw_finish sw_finish; 1154 int ret; 1155 1156 if (bo == NULL) 1157 return 0; 1158 1159 pthread_mutex_lock(&bufmgr_gem->lock); 1160 1161 /* Cause a flush to happen if the buffer's pinned for scanout, so the 1162 * results show up in a timely manner. 1163 */ 1164 sw_finish.handle = bo_gem->gem_handle; 1165 ret = drmIoctl(bufmgr_gem->fd, 1166 DRM_IOCTL_I915_GEM_SW_FINISH, 1167 &sw_finish); 1168 ret = ret == -1 ? -errno : 0; 1169 1170 bo->virtual = NULL; 1171 pthread_mutex_unlock(&bufmgr_gem->lock); 1172 1173 return ret; 1174} 1175 1176static int 1177drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, 1178 unsigned long size, const void *data) 1179{ 1180 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1181 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1182 struct drm_i915_gem_pwrite pwrite; 1183 int ret; 1184 1185 memset(&pwrite, 0, sizeof(pwrite)); 1186 pwrite.handle = bo_gem->gem_handle; 1187 pwrite.offset = offset; 1188 pwrite.size = size; 1189 pwrite.data_ptr = (uint64_t) (uintptr_t) data; 1190 ret = drmIoctl(bufmgr_gem->fd, 1191 DRM_IOCTL_I915_GEM_PWRITE, 1192 &pwrite); 1193 if (ret != 0) { 1194 ret = -errno; 1195 DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", 1196 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1197 (int)size, strerror(errno)); 1198 } 1199 1200 return ret; 1201} 1202 1203static int 1204drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) 1205{ 1206 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1207 struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; 1208 int ret; 1209 1210 get_pipe_from_crtc_id.crtc_id = crtc_id; 1211 ret = drmIoctl(bufmgr_gem->fd, 1212 DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, 1213 &get_pipe_from_crtc_id); 1214 if (ret != 0) { 1215 /* We return -1 here to signal that we don't 1216 * know which pipe is associated with this crtc. 1217 * This lets the caller know that this information 1218 * isn't available; using the wrong pipe for 1219 * vblank waiting can cause the chipset to lock up 1220 */ 1221 return -1; 1222 } 1223 1224 return get_pipe_from_crtc_id.pipe; 1225} 1226 1227static int 1228drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, 1229 unsigned long size, void *data) 1230{ 1231 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1232 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1233 struct drm_i915_gem_pread pread; 1234 int ret; 1235 1236 memset(&pread, 0, sizeof(pread)); 1237 pread.handle = bo_gem->gem_handle; 1238 pread.offset = offset; 1239 pread.size = size; 1240 pread.data_ptr = (uint64_t) (uintptr_t) data; 1241 ret = drmIoctl(bufmgr_gem->fd, 1242 DRM_IOCTL_I915_GEM_PREAD, 1243 &pread); 1244 if (ret != 0) { 1245 ret = -errno; 1246 DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", 1247 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1248 (int)size, strerror(errno)); 1249 } 1250 1251 return ret; 1252} 1253 1254/** Waits for all GPU rendering with the object to have completed. */ 1255static void 1256drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) 1257{ 1258 drm_intel_gem_bo_start_gtt_access(bo, 1); 1259} 1260 1261/** 1262 * Sets the object to the GTT read and possibly write domain, used by the X 1263 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). 1264 * 1265 * In combination with drm_intel_gem_bo_pin() and manual fence management, we 1266 * can do tiled pixmaps this way. 1267 */ 1268void 1269drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) 1270{ 1271 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1272 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1273 struct drm_i915_gem_set_domain set_domain; 1274 int ret; 1275 1276 set_domain.handle = bo_gem->gem_handle; 1277 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1278 set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; 1279 ret = drmIoctl(bufmgr_gem->fd, 1280 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1281 &set_domain); 1282 if (ret != 0) { 1283 DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", 1284 __FILE__, __LINE__, bo_gem->gem_handle, 1285 set_domain.read_domains, set_domain.write_domain, 1286 strerror(errno)); 1287 } 1288} 1289 1290static void 1291drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) 1292{ 1293 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1294 int i; 1295 1296 free(bufmgr_gem->exec2_objects); 1297 free(bufmgr_gem->exec_objects); 1298 free(bufmgr_gem->exec_bos); 1299 1300 pthread_mutex_destroy(&bufmgr_gem->lock); 1301 1302 /* Free any cached buffer objects we were going to reuse */ 1303 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 1304 struct drm_intel_gem_bo_bucket *bucket = 1305 &bufmgr_gem->cache_bucket[i]; 1306 drm_intel_bo_gem *bo_gem; 1307 1308 while (!DRMLISTEMPTY(&bucket->head)) { 1309 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1310 bucket->head.next, head); 1311 DRMLISTDEL(&bo_gem->head); 1312 1313 drm_intel_gem_bo_free(&bo_gem->bo); 1314 } 1315 } 1316 1317 free(bufmgr); 1318} 1319 1320/** 1321 * Adds the target buffer to the validation list and adds the relocation 1322 * to the reloc_buffer's relocation list. 1323 * 1324 * The relocation entry at the given offset must already contain the 1325 * precomputed relocation value, because the kernel will optimize out 1326 * the relocation entry write when the buffer hasn't moved from the 1327 * last known offset in target_bo. 1328 */ 1329static int 1330do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 1331 drm_intel_bo *target_bo, uint32_t target_offset, 1332 uint32_t read_domains, uint32_t write_domain, 1333 bool need_fence) 1334{ 1335 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1336 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1337 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 1338 bool fenced_command; 1339 1340 if (bo_gem->has_error) 1341 return -ENOMEM; 1342 1343 if (target_bo_gem->has_error) { 1344 bo_gem->has_error = true; 1345 return -ENOMEM; 1346 } 1347 1348 /* We never use HW fences for rendering on 965+ */ 1349 if (bufmgr_gem->gen >= 4) 1350 need_fence = false; 1351 1352 fenced_command = need_fence; 1353 if (target_bo_gem->tiling_mode == I915_TILING_NONE) 1354 need_fence = false; 1355 1356 /* Create a new relocation list if needed */ 1357 if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo)) 1358 return -ENOMEM; 1359 1360 /* Check overflow */ 1361 assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); 1362 1363 /* Check args */ 1364 assert(offset <= bo->size - 4); 1365 assert((write_domain & (write_domain - 1)) == 0); 1366 1367 /* Make sure that we're not adding a reloc to something whose size has 1368 * already been accounted for. 1369 */ 1370 assert(!bo_gem->used_as_reloc_target); 1371 if (target_bo_gem != bo_gem) { 1372 target_bo_gem->used_as_reloc_target = true; 1373 bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; 1374 } 1375 /* An object needing a fence is a tiled buffer, so it won't have 1376 * relocs to other buffers. 1377 */ 1378 if (need_fence) 1379 target_bo_gem->reloc_tree_fences = 1; 1380 bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; 1381 1382 bo_gem->relocs[bo_gem->reloc_count].offset = offset; 1383 bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; 1384 bo_gem->relocs[bo_gem->reloc_count].target_handle = 1385 target_bo_gem->gem_handle; 1386 bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; 1387 bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; 1388 bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset; 1389 1390 bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; 1391 if (target_bo != bo) 1392 drm_intel_gem_bo_reference(target_bo); 1393 if (fenced_command) 1394 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 1395 DRM_INTEL_RELOC_FENCE; 1396 else 1397 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; 1398 1399 bo_gem->reloc_count++; 1400 1401 return 0; 1402} 1403 1404static int 1405drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 1406 drm_intel_bo *target_bo, uint32_t target_offset, 1407 uint32_t read_domains, uint32_t write_domain) 1408{ 1409 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1410 1411 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 1412 read_domains, write_domain, 1413 !bufmgr_gem->fenced_relocs); 1414} 1415 1416static int 1417drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, 1418 drm_intel_bo *target_bo, 1419 uint32_t target_offset, 1420 uint32_t read_domains, uint32_t write_domain) 1421{ 1422 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 1423 read_domains, write_domain, true); 1424} 1425 1426int 1427drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo) 1428{ 1429 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1430 1431 return bo_gem->reloc_count; 1432} 1433 1434/** 1435 * Removes existing relocation entries in the BO after "start". 1436 * 1437 * This allows a user to avoid a two-step process for state setup with 1438 * counting up all the buffer objects and doing a 1439 * drm_intel_bufmgr_check_aperture_space() before emitting any of the 1440 * relocations for the state setup. Instead, save the state of the 1441 * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the 1442 * state, and then check if it still fits in the aperture. 1443 * 1444 * Any further drm_intel_bufmgr_check_aperture_space() queries 1445 * involving this buffer in the tree are undefined after this call. 1446 */ 1447void 1448drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start) 1449{ 1450 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1451 int i; 1452 struct timespec time; 1453 1454 clock_gettime(CLOCK_MONOTONIC, &time); 1455 1456 assert(bo_gem->reloc_count >= start); 1457 /* Unreference the cleared target buffers */ 1458 for (i = start; i < bo_gem->reloc_count; i++) { 1459 if (bo_gem->reloc_target_info[i].bo != bo) { 1460 drm_intel_gem_bo_unreference_locked_timed(bo_gem-> 1461 reloc_target_info[i].bo, 1462 time.tv_sec); 1463 } 1464 } 1465 bo_gem->reloc_count = start; 1466} 1467 1468/** 1469 * Walk the tree of relocations rooted at BO and accumulate the list of 1470 * validations to be performed and update the relocation buffers with 1471 * index values into the validation list. 1472 */ 1473static void 1474drm_intel_gem_bo_process_reloc(drm_intel_bo *bo) 1475{ 1476 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1477 int i; 1478 1479 if (bo_gem->relocs == NULL) 1480 return; 1481 1482 for (i = 0; i < bo_gem->reloc_count; i++) { 1483 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 1484 1485 if (target_bo == bo) 1486 continue; 1487 1488 /* Continue walking the tree depth-first. */ 1489 drm_intel_gem_bo_process_reloc(target_bo); 1490 1491 /* Add the target to the validate list */ 1492 drm_intel_add_validate_buffer(target_bo); 1493 } 1494} 1495 1496static void 1497drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) 1498{ 1499 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1500 int i; 1501 1502 if (bo_gem->relocs == NULL) 1503 return; 1504 1505 for (i = 0; i < bo_gem->reloc_count; i++) { 1506 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 1507 int need_fence; 1508 1509 if (target_bo == bo) 1510 continue; 1511 1512 /* Continue walking the tree depth-first. */ 1513 drm_intel_gem_bo_process_reloc2(target_bo); 1514 1515 need_fence = (bo_gem->reloc_target_info[i].flags & 1516 DRM_INTEL_RELOC_FENCE); 1517 1518 /* Add the target to the validate list */ 1519 drm_intel_add_validate_buffer2(target_bo, need_fence); 1520 } 1521} 1522 1523 1524static void 1525drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) 1526{ 1527 int i; 1528 1529 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1530 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1531 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1532 1533 /* Update the buffer offset */ 1534 if (bufmgr_gem->exec_objects[i].offset != bo->offset) { 1535 DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 1536 bo_gem->gem_handle, bo_gem->name, bo->offset, 1537 (unsigned long long)bufmgr_gem->exec_objects[i]. 1538 offset); 1539 bo->offset = bufmgr_gem->exec_objects[i].offset; 1540 } 1541 } 1542} 1543 1544static void 1545drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) 1546{ 1547 int i; 1548 1549 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1550 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1551 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1552 1553 /* Update the buffer offset */ 1554 if (bufmgr_gem->exec2_objects[i].offset != bo->offset) { 1555 DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 1556 bo_gem->gem_handle, bo_gem->name, bo->offset, 1557 (unsigned long long)bufmgr_gem->exec2_objects[i].offset); 1558 bo->offset = bufmgr_gem->exec2_objects[i].offset; 1559 } 1560 } 1561} 1562 1563static int 1564drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, 1565 drm_clip_rect_t * cliprects, int num_cliprects, int DR4) 1566{ 1567 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1568 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1569 struct drm_i915_gem_execbuffer execbuf; 1570 int ret, i; 1571 1572 if (bo_gem->has_error) 1573 return -ENOMEM; 1574 1575 pthread_mutex_lock(&bufmgr_gem->lock); 1576 /* Update indices and set up the validate list. */ 1577 drm_intel_gem_bo_process_reloc(bo); 1578 1579 /* Add the batch buffer to the validation list. There are no 1580 * relocations pointing to it. 1581 */ 1582 drm_intel_add_validate_buffer(bo); 1583 1584 execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; 1585 execbuf.buffer_count = bufmgr_gem->exec_count; 1586 execbuf.batch_start_offset = 0; 1587 execbuf.batch_len = used; 1588 execbuf.cliprects_ptr = (uintptr_t) cliprects; 1589 execbuf.num_cliprects = num_cliprects; 1590 execbuf.DR1 = 0; 1591 execbuf.DR4 = DR4; 1592 1593 ret = drmIoctl(bufmgr_gem->fd, 1594 DRM_IOCTL_I915_GEM_EXECBUFFER, 1595 &execbuf); 1596 if (ret != 0) { 1597 ret = -errno; 1598 if (errno == ENOSPC) { 1599 DBG("Execbuffer fails to pin. " 1600 "Estimate: %u. Actual: %u. Available: %u\n", 1601 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 1602 bufmgr_gem-> 1603 exec_count), 1604 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 1605 bufmgr_gem-> 1606 exec_count), 1607 (unsigned int)bufmgr_gem->gtt_size); 1608 } 1609 } 1610 drm_intel_update_buffer_offsets(bufmgr_gem); 1611 1612 if (bufmgr_gem->bufmgr.debug) 1613 drm_intel_gem_dump_validation_list(bufmgr_gem); 1614 1615 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1616 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1617 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1618 1619 /* Disconnect the buffer from the validate list */ 1620 bo_gem->validate_index = -1; 1621 bufmgr_gem->exec_bos[i] = NULL; 1622 } 1623 bufmgr_gem->exec_count = 0; 1624 pthread_mutex_unlock(&bufmgr_gem->lock); 1625 1626 return ret; 1627} 1628 1629static int 1630drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, 1631 drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 1632 unsigned int flags) 1633{ 1634 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1635 struct drm_i915_gem_execbuffer2 execbuf; 1636 int ret, i; 1637 1638 switch (flags & 0x7) { 1639 default: 1640 return -EINVAL; 1641 case I915_EXEC_BLT: 1642 if (!bufmgr_gem->has_blt) 1643 return -EINVAL; 1644 break; 1645 case I915_EXEC_BSD: 1646 if (!bufmgr_gem->has_bsd) 1647 return -EINVAL; 1648 break; 1649 case I915_EXEC_RENDER: 1650 case I915_EXEC_DEFAULT: 1651 break; 1652 } 1653 1654 pthread_mutex_lock(&bufmgr_gem->lock); 1655 /* Update indices and set up the validate list. */ 1656 drm_intel_gem_bo_process_reloc2(bo); 1657 1658 /* Add the batch buffer to the validation list. There are no relocations 1659 * pointing to it. 1660 */ 1661 drm_intel_add_validate_buffer2(bo, 0); 1662 1663 execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; 1664 execbuf.buffer_count = bufmgr_gem->exec_count; 1665 execbuf.batch_start_offset = 0; 1666 execbuf.batch_len = used; 1667 execbuf.cliprects_ptr = (uintptr_t)cliprects; 1668 execbuf.num_cliprects = num_cliprects; 1669 execbuf.DR1 = 0; 1670 execbuf.DR4 = DR4; 1671 execbuf.flags = flags; 1672 execbuf.rsvd1 = 0; 1673 execbuf.rsvd2 = 0; 1674 1675 ret = drmIoctl(bufmgr_gem->fd, 1676 DRM_IOCTL_I915_GEM_EXECBUFFER2, 1677 &execbuf); 1678 if (ret != 0) { 1679 ret = -errno; 1680 if (ret == -ENOSPC) { 1681 DBG("Execbuffer fails to pin. " 1682 "Estimate: %u. Actual: %u. Available: %u\n", 1683 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 1684 bufmgr_gem->exec_count), 1685 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 1686 bufmgr_gem->exec_count), 1687 (unsigned int) bufmgr_gem->gtt_size); 1688 } 1689 } 1690 drm_intel_update_buffer_offsets2(bufmgr_gem); 1691 1692 if (bufmgr_gem->bufmgr.debug) 1693 drm_intel_gem_dump_validation_list(bufmgr_gem); 1694 1695 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1696 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1697 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1698 1699 /* Disconnect the buffer from the validate list */ 1700 bo_gem->validate_index = -1; 1701 bufmgr_gem->exec_bos[i] = NULL; 1702 } 1703 bufmgr_gem->exec_count = 0; 1704 pthread_mutex_unlock(&bufmgr_gem->lock); 1705 1706 return ret; 1707} 1708 1709static int 1710drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, 1711 drm_clip_rect_t *cliprects, int num_cliprects, 1712 int DR4) 1713{ 1714 return drm_intel_gem_bo_mrb_exec2(bo, used, 1715 cliprects, num_cliprects, DR4, 1716 I915_EXEC_RENDER); 1717} 1718 1719static int 1720drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) 1721{ 1722 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1723 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1724 struct drm_i915_gem_pin pin; 1725 int ret; 1726 1727 memset(&pin, 0, sizeof(pin)); 1728 pin.handle = bo_gem->gem_handle; 1729 pin.alignment = alignment; 1730 1731 ret = drmIoctl(bufmgr_gem->fd, 1732 DRM_IOCTL_I915_GEM_PIN, 1733 &pin); 1734 if (ret != 0) 1735 return -errno; 1736 1737 bo->offset = pin.offset; 1738 return 0; 1739} 1740 1741static int 1742drm_intel_gem_bo_unpin(drm_intel_bo *bo) 1743{ 1744 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1745 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1746 struct drm_i915_gem_unpin unpin; 1747 int ret; 1748 1749 memset(&unpin, 0, sizeof(unpin)); 1750 unpin.handle = bo_gem->gem_handle; 1751 1752 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); 1753 if (ret != 0) 1754 return -errno; 1755 1756 return 0; 1757} 1758 1759static int 1760drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 1761 uint32_t tiling_mode, 1762 uint32_t stride) 1763{ 1764 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1765 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1766 struct drm_i915_gem_set_tiling set_tiling; 1767 int ret; 1768 1769 if (bo_gem->global_name == 0 && 1770 tiling_mode == bo_gem->tiling_mode && 1771 stride == bo_gem->stride) 1772 return 0; 1773 1774 memset(&set_tiling, 0, sizeof(set_tiling)); 1775 do { 1776 /* set_tiling is slightly broken and overwrites the 1777 * input on the error path, so we have to open code 1778 * rmIoctl. 1779 */ 1780 set_tiling.handle = bo_gem->gem_handle; 1781 set_tiling.tiling_mode = tiling_mode; 1782 set_tiling.stride = stride; 1783 1784 ret = ioctl(bufmgr_gem->fd, 1785 DRM_IOCTL_I915_GEM_SET_TILING, 1786 &set_tiling); 1787 } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 1788 if (ret == -1) 1789 return -errno; 1790 1791 bo_gem->tiling_mode = set_tiling.tiling_mode; 1792 bo_gem->swizzle_mode = set_tiling.swizzle_mode; 1793 bo_gem->stride = set_tiling.stride; 1794 return 0; 1795} 1796 1797static int 1798drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 1799 uint32_t stride) 1800{ 1801 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1802 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1803 int ret; 1804 1805 /* Linear buffers have no stride. By ensuring that we only ever use 1806 * stride 0 with linear buffers, we simplify our code. 1807 */ 1808 if (*tiling_mode == I915_TILING_NONE) 1809 stride = 0; 1810 1811 ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride); 1812 if (ret == 0) 1813 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 1814 1815 *tiling_mode = bo_gem->tiling_mode; 1816 return ret; 1817} 1818 1819static int 1820drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 1821 uint32_t * swizzle_mode) 1822{ 1823 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1824 1825 *tiling_mode = bo_gem->tiling_mode; 1826 *swizzle_mode = bo_gem->swizzle_mode; 1827 return 0; 1828} 1829 1830static int 1831drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) 1832{ 1833 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1834 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1835 struct drm_gem_flink flink; 1836 int ret; 1837 1838 if (!bo_gem->global_name) { 1839 memset(&flink, 0, sizeof(flink)); 1840 flink.handle = bo_gem->gem_handle; 1841 1842 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); 1843 if (ret != 0) 1844 return -errno; 1845 bo_gem->global_name = flink.name; 1846 bo_gem->reusable = false; 1847 1848 DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 1849 } 1850 1851 *name = bo_gem->global_name; 1852 return 0; 1853} 1854 1855/** 1856 * Enables unlimited caching of buffer objects for reuse. 1857 * 1858 * This is potentially very memory expensive, as the cache at each bucket 1859 * size is only bounded by how many buffers of that size we've managed to have 1860 * in flight at once. 1861 */ 1862void 1863drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) 1864{ 1865 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1866 1867 bufmgr_gem->bo_reuse = true; 1868} 1869 1870/** 1871 * Enable use of fenced reloc type. 1872 * 1873 * New code should enable this to avoid unnecessary fence register 1874 * allocation. If this option is not enabled, all relocs will have fence 1875 * register allocated. 1876 */ 1877void 1878drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr) 1879{ 1880 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 1881 1882 if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2) 1883 bufmgr_gem->fenced_relocs = true; 1884} 1885 1886/** 1887 * Return the additional aperture space required by the tree of buffer objects 1888 * rooted at bo. 1889 */ 1890static int 1891drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) 1892{ 1893 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1894 int i; 1895 int total = 0; 1896 1897 if (bo == NULL || bo_gem->included_in_check_aperture) 1898 return 0; 1899 1900 total += bo->size; 1901 bo_gem->included_in_check_aperture = true; 1902 1903 for (i = 0; i < bo_gem->reloc_count; i++) 1904 total += 1905 drm_intel_gem_bo_get_aperture_space(bo_gem-> 1906 reloc_target_info[i].bo); 1907 1908 return total; 1909} 1910 1911/** 1912 * Count the number of buffers in this list that need a fence reg 1913 * 1914 * If the count is greater than the number of available regs, we'll have 1915 * to ask the caller to resubmit a batch with fewer tiled buffers. 1916 * 1917 * This function over-counts if the same buffer is used multiple times. 1918 */ 1919static unsigned int 1920drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count) 1921{ 1922 int i; 1923 unsigned int total = 0; 1924 1925 for (i = 0; i < count; i++) { 1926 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 1927 1928 if (bo_gem == NULL) 1929 continue; 1930 1931 total += bo_gem->reloc_tree_fences; 1932 } 1933 return total; 1934} 1935 1936/** 1937 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready 1938 * for the next drm_intel_bufmgr_check_aperture_space() call. 1939 */ 1940static void 1941drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) 1942{ 1943 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1944 int i; 1945 1946 if (bo == NULL || !bo_gem->included_in_check_aperture) 1947 return; 1948 1949 bo_gem->included_in_check_aperture = false; 1950 1951 for (i = 0; i < bo_gem->reloc_count; i++) 1952 drm_intel_gem_bo_clear_aperture_space_flag(bo_gem-> 1953 reloc_target_info[i].bo); 1954} 1955 1956/** 1957 * Return a conservative estimate for the amount of aperture required 1958 * for a collection of buffers. This may double-count some buffers. 1959 */ 1960static unsigned int 1961drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) 1962{ 1963 int i; 1964 unsigned int total = 0; 1965 1966 for (i = 0; i < count; i++) { 1967 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 1968 if (bo_gem != NULL) 1969 total += bo_gem->reloc_tree_size; 1970 } 1971 return total; 1972} 1973 1974/** 1975 * Return the amount of aperture needed for a collection of buffers. 1976 * This avoids double counting any buffers, at the cost of looking 1977 * at every buffer in the set. 1978 */ 1979static unsigned int 1980drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) 1981{ 1982 int i; 1983 unsigned int total = 0; 1984 1985 for (i = 0; i < count; i++) { 1986 total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); 1987 /* For the first buffer object in the array, we get an 1988 * accurate count back for its reloc_tree size (since nothing 1989 * had been flagged as being counted yet). We can save that 1990 * value out as a more conservative reloc_tree_size that 1991 * avoids double-counting target buffers. Since the first 1992 * buffer happens to usually be the batch buffer in our 1993 * callers, this can pull us back from doing the tree 1994 * walk on every new batch emit. 1995 */ 1996 if (i == 0) { 1997 drm_intel_bo_gem *bo_gem = 1998 (drm_intel_bo_gem *) bo_array[i]; 1999 bo_gem->reloc_tree_size = total; 2000 } 2001 } 2002 2003 for (i = 0; i < count; i++) 2004 drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]); 2005 return total; 2006} 2007 2008/** 2009 * Return -1 if the batchbuffer should be flushed before attempting to 2010 * emit rendering referencing the buffers pointed to by bo_array. 2011 * 2012 * This is required because if we try to emit a batchbuffer with relocations 2013 * to a tree of buffers that won't simultaneously fit in the aperture, 2014 * the rendering will return an error at a point where the software is not 2015 * prepared to recover from it. 2016 * 2017 * However, we also want to emit the batchbuffer significantly before we reach 2018 * the limit, as a series of batchbuffers each of which references buffers 2019 * covering almost all of the aperture means that at each emit we end up 2020 * waiting to evict a buffer from the last rendering, and we get synchronous 2021 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to 2022 * get better parallelism. 2023 */ 2024static int 2025drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) 2026{ 2027 drm_intel_bufmgr_gem *bufmgr_gem = 2028 (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr; 2029 unsigned int total = 0; 2030 unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; 2031 int total_fences; 2032 2033 /* Check for fence reg constraints if necessary */ 2034 if (bufmgr_gem->available_fences) { 2035 total_fences = drm_intel_gem_total_fences(bo_array, count); 2036 if (total_fences > bufmgr_gem->available_fences) 2037 return -ENOSPC; 2038 } 2039 2040 total = drm_intel_gem_estimate_batch_space(bo_array, count); 2041 2042 if (total > threshold) 2043 total = drm_intel_gem_compute_batch_space(bo_array, count); 2044 2045 if (total > threshold) { 2046 DBG("check_space: overflowed available aperture, " 2047 "%dkb vs %dkb\n", 2048 total / 1024, (int)bufmgr_gem->gtt_size / 1024); 2049 return -ENOSPC; 2050 } else { 2051 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, 2052 (int)bufmgr_gem->gtt_size / 1024); 2053 return 0; 2054 } 2055} 2056 2057/* 2058 * Disable buffer reuse for objects which are shared with the kernel 2059 * as scanout buffers 2060 */ 2061static int 2062drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) 2063{ 2064 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2065 2066 bo_gem->reusable = false; 2067 return 0; 2068} 2069 2070static int 2071drm_intel_gem_bo_is_reusable(drm_intel_bo *bo) 2072{ 2073 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2074 2075 return bo_gem->reusable; 2076} 2077 2078static int 2079_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 2080{ 2081 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2082 int i; 2083 2084 for (i = 0; i < bo_gem->reloc_count; i++) { 2085 if (bo_gem->reloc_target_info[i].bo == target_bo) 2086 return 1; 2087 if (bo == bo_gem->reloc_target_info[i].bo) 2088 continue; 2089 if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo, 2090 target_bo)) 2091 return 1; 2092 } 2093 2094 return 0; 2095} 2096 2097/** Return true if target_bo is referenced by bo's relocation tree. */ 2098static int 2099drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 2100{ 2101 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 2102 2103 if (bo == NULL || target_bo == NULL) 2104 return 0; 2105 if (target_bo_gem->used_as_reloc_target) 2106 return _drm_intel_gem_bo_references(bo, target_bo); 2107 return 0; 2108} 2109 2110static void 2111add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size) 2112{ 2113 unsigned int i = bufmgr_gem->num_buckets; 2114 2115 assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket)); 2116 2117 DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); 2118 bufmgr_gem->cache_bucket[i].size = size; 2119 bufmgr_gem->num_buckets++; 2120} 2121 2122static void 2123init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem) 2124{ 2125 unsigned long size, cache_max_size = 64 * 1024 * 1024; 2126 2127 /* OK, so power of two buckets was too wasteful of memory. 2128 * Give 3 other sizes between each power of two, to hopefully 2129 * cover things accurately enough. (The alternative is 2130 * probably to just go for exact matching of sizes, and assume 2131 * that for things like composited window resize the tiled 2132 * width/height alignment and rounding of sizes to pages will 2133 * get us useful cache hit rates anyway) 2134 */ 2135 add_bucket(bufmgr_gem, 4096); 2136 add_bucket(bufmgr_gem, 4096 * 2); 2137 add_bucket(bufmgr_gem, 4096 * 3); 2138 2139 /* Initialize the linked lists for BO reuse cache. */ 2140 for (size = 4 * 4096; size <= cache_max_size; size *= 2) { 2141 add_bucket(bufmgr_gem, size); 2142 2143 add_bucket(bufmgr_gem, size + size * 1 / 4); 2144 add_bucket(bufmgr_gem, size + size * 2 / 4); 2145 add_bucket(bufmgr_gem, size + size * 3 / 4); 2146 } 2147} 2148 2149/** 2150 * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 2151 * and manage map buffer objections. 2152 * 2153 * \param fd File descriptor of the opened DRM device. 2154 */ 2155drm_intel_bufmgr * 2156drm_intel_bufmgr_gem_init(int fd, int batch_size) 2157{ 2158 drm_intel_bufmgr_gem *bufmgr_gem; 2159 struct drm_i915_gem_get_aperture aperture; 2160 drm_i915_getparam_t gp; 2161 int ret, tmp; 2162 bool exec2 = false; 2163 2164 bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); 2165 if (bufmgr_gem == NULL) 2166 return NULL; 2167 2168 bufmgr_gem->fd = fd; 2169 2170 if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { 2171 free(bufmgr_gem); 2172 return NULL; 2173 } 2174 2175 ret = drmIoctl(bufmgr_gem->fd, 2176 DRM_IOCTL_I915_GEM_GET_APERTURE, 2177 &aperture); 2178 2179 if (ret == 0) 2180 bufmgr_gem->gtt_size = aperture.aper_available_size; 2181 else { 2182 fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", 2183 strerror(errno)); 2184 bufmgr_gem->gtt_size = 128 * 1024 * 1024; 2185 fprintf(stderr, "Assuming %dkB available aperture size.\n" 2186 "May lead to reduced performance or incorrect " 2187 "rendering.\n", 2188 (int)bufmgr_gem->gtt_size / 1024); 2189 } 2190 2191 gp.param = I915_PARAM_CHIPSET_ID; 2192 gp.value = &bufmgr_gem->pci_device; 2193 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2194 if (ret) { 2195 fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); 2196 fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); 2197 } 2198 2199 if (IS_GEN2(bufmgr_gem)) 2200 bufmgr_gem->gen = 2; 2201 else if (IS_GEN3(bufmgr_gem)) 2202 bufmgr_gem->gen = 3; 2203 else if (IS_GEN4(bufmgr_gem)) 2204 bufmgr_gem->gen = 4; 2205 else 2206 bufmgr_gem->gen = 6; 2207 2208 gp.value = &tmp; 2209 2210 gp.param = I915_PARAM_HAS_EXECBUF2; 2211 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2212 if (!ret) 2213 exec2 = true; 2214 2215 gp.param = I915_PARAM_HAS_BSD; 2216 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2217 bufmgr_gem->has_bsd = ret == 0; 2218 2219 gp.param = I915_PARAM_HAS_BLT; 2220 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2221 bufmgr_gem->has_blt = ret == 0; 2222 2223 gp.param = I915_PARAM_HAS_RELAXED_FENCING; 2224 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2225 bufmgr_gem->has_relaxed_fencing = ret == 0; 2226 2227 if (bufmgr_gem->gen < 4) { 2228 gp.param = I915_PARAM_NUM_FENCES_AVAIL; 2229 gp.value = &bufmgr_gem->available_fences; 2230 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2231 if (ret) { 2232 fprintf(stderr, "get fences failed: %d [%d]\n", ret, 2233 errno); 2234 fprintf(stderr, "param: %d, val: %d\n", gp.param, 2235 *gp.value); 2236 bufmgr_gem->available_fences = 0; 2237 } else { 2238 /* XXX The kernel reports the total number of fences, 2239 * including any that may be pinned. 2240 * 2241 * We presume that there will be at least one pinned 2242 * fence for the scanout buffer, but there may be more 2243 * than one scanout and the user may be manually 2244 * pinning buffers. Let's move to execbuffer2 and 2245 * thereby forget the insanity of using fences... 2246 */ 2247 bufmgr_gem->available_fences -= 2; 2248 if (bufmgr_gem->available_fences < 0) 2249 bufmgr_gem->available_fences = 0; 2250 } 2251 } 2252 2253 /* Let's go with one relocation per every 2 dwords (but round down a bit 2254 * since a power of two will mean an extra page allocation for the reloc 2255 * buffer). 2256 * 2257 * Every 4 was too few for the blender benchmark. 2258 */ 2259 bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; 2260 2261 bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; 2262 bufmgr_gem->bufmgr.bo_alloc_for_render = 2263 drm_intel_gem_bo_alloc_for_render; 2264 bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled; 2265 bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; 2266 bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; 2267 bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; 2268 bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap; 2269 bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata; 2270 bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; 2271 bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; 2272 bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; 2273 bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence; 2274 bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; 2275 bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; 2276 bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; 2277 bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; 2278 bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; 2279 /* Use the new one if available */ 2280 if (exec2) { 2281 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2; 2282 bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2; 2283 } else 2284 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; 2285 bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; 2286 bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise; 2287 bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy; 2288 bufmgr_gem->bufmgr.debug = 0; 2289 bufmgr_gem->bufmgr.check_aperture_space = 2290 drm_intel_gem_check_aperture_space; 2291 bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; 2292 bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable; 2293 bufmgr_gem->bufmgr.get_pipe_from_crtc_id = 2294 drm_intel_gem_get_pipe_from_crtc_id; 2295 bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; 2296 2297 DRMINITLISTHEAD(&bufmgr_gem->named); 2298 init_cache_buckets(bufmgr_gem); 2299 2300 return &bufmgr_gem->bufmgr; 2301} 2302