intel_bufmgr_gem.c revision 4abb65f95c79c9a2ec2cc1147a753704b5cdd22e
1/************************************************************************** 2 * 3 * Copyright � 2007 Red Hat Inc. 4 * Copyright � 2007 Intel Corporation 5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA 6 * All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the 10 * "Software"), to deal in the Software without restriction, including 11 * without limitation the rights to use, copy, modify, merge, publish, 12 * distribute, sub license, and/or sell copies of the Software, and to 13 * permit persons to whom the Software is furnished to do so, subject to 14 * the following conditions: 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * The above copyright notice and this permission notice (including the 25 * next paragraph) shall be included in all copies or substantial portions 26 * of the Software. 27 * 28 * 29 **************************************************************************/ 30/* 31 * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com> 32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com> 33 * Eric Anholt <eric@anholt.net> 34 * Dave Airlie <airlied@linux.ie> 35 */ 36 37#ifdef HAVE_CONFIG_H 38#include "config.h" 39#endif 40 41#include <xf86drm.h> 42#include <xf86atomic.h> 43#include <fcntl.h> 44#include <stdio.h> 45#include <stdlib.h> 46#include <string.h> 47#include <unistd.h> 48#include <assert.h> 49#include <pthread.h> 50#include <sys/ioctl.h> 51#include <sys/mman.h> 52#include <sys/stat.h> 53#include <sys/types.h> 54 55#include "errno.h" 56#include "libdrm_lists.h" 57#include "intel_bufmgr.h" 58#include "intel_bufmgr_priv.h" 59#include "intel_chipset.h" 60#include "string.h" 61 62#include "i915_drm.h" 63 64#define DBG(...) do { \ 65 if (bufmgr_gem->bufmgr.debug) \ 66 fprintf(stderr, __VA_ARGS__); \ 67} while (0) 68 69#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 70 71typedef struct _drm_intel_bo_gem drm_intel_bo_gem; 72 73struct drm_intel_gem_bo_bucket { 74 drmMMListHead head; 75 unsigned long size; 76}; 77 78typedef struct _drm_intel_bufmgr_gem { 79 drm_intel_bufmgr bufmgr; 80 81 int fd; 82 83 int max_relocs; 84 85 pthread_mutex_t lock; 86 87 struct drm_i915_gem_exec_object *exec_objects; 88 struct drm_i915_gem_exec_object2 *exec2_objects; 89 drm_intel_bo **exec_bos; 90 int exec_size; 91 int exec_count; 92 93 /** Array of lists of cached gem objects of power-of-two sizes */ 94 struct drm_intel_gem_bo_bucket cache_bucket[14 * 4]; 95 int num_buckets; 96 time_t time; 97 98 uint64_t gtt_size; 99 int available_fences; 100 int pci_device; 101 int gen; 102 unsigned int has_bsd : 1; 103 unsigned int has_blt : 1; 104 unsigned int has_relaxed_fencing : 1; 105 unsigned int bo_reuse : 1; 106 char fenced_relocs; 107} drm_intel_bufmgr_gem; 108 109#define DRM_INTEL_RELOC_FENCE (1<<0) 110 111typedef struct _drm_intel_reloc_target_info { 112 drm_intel_bo *bo; 113 int flags; 114} drm_intel_reloc_target; 115 116struct _drm_intel_bo_gem { 117 drm_intel_bo bo; 118 119 atomic_t refcount; 120 uint32_t gem_handle; 121 const char *name; 122 123 /** 124 * Kenel-assigned global name for this object 125 */ 126 unsigned int global_name; 127 128 /** 129 * Index of the buffer within the validation list while preparing a 130 * batchbuffer execution. 131 */ 132 int validate_index; 133 134 /** 135 * Current tiling mode 136 */ 137 uint32_t tiling_mode; 138 uint32_t swizzle_mode; 139 unsigned long stride; 140 141 time_t free_time; 142 143 /** Array passed to the DRM containing relocation information. */ 144 struct drm_i915_gem_relocation_entry *relocs; 145 /** 146 * Array of info structs corresponding to relocs[i].target_handle etc 147 */ 148 drm_intel_reloc_target *reloc_target_info; 149 /** Number of entries in relocs */ 150 int reloc_count; 151 /** Mapped address for the buffer, saved across map/unmap cycles */ 152 void *mem_virtual; 153 /** GTT virtual address for the buffer, saved across map/unmap cycles */ 154 void *gtt_virtual; 155 156 /** BO cache list */ 157 drmMMListHead head; 158 159 /** 160 * Boolean of whether this BO and its children have been included in 161 * the current drm_intel_bufmgr_check_aperture_space() total. 162 */ 163 char included_in_check_aperture; 164 165 /** 166 * Boolean of whether this buffer has been used as a relocation 167 * target and had its size accounted for, and thus can't have any 168 * further relocations added to it. 169 */ 170 char used_as_reloc_target; 171 172 /** 173 * Boolean of whether we have encountered an error whilst building the relocation tree. 174 */ 175 char has_error; 176 177 /** 178 * Boolean of whether this buffer can be re-used 179 */ 180 char reusable; 181 182 /** 183 * Size in bytes of this buffer and its relocation descendents. 184 * 185 * Used to avoid costly tree walking in 186 * drm_intel_bufmgr_check_aperture in the common case. 187 */ 188 int reloc_tree_size; 189 190 /** 191 * Number of potential fence registers required by this buffer and its 192 * relocations. 193 */ 194 int reloc_tree_fences; 195}; 196 197static unsigned int 198drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); 199 200static unsigned int 201drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count); 202 203static int 204drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 205 uint32_t * swizzle_mode); 206 207static int 208drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 209 uint32_t tiling_mode, 210 uint32_t stride); 211 212static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 213 time_t time); 214 215static void drm_intel_gem_bo_unreference(drm_intel_bo *bo); 216 217static void drm_intel_gem_bo_free(drm_intel_bo *bo); 218 219static unsigned long 220drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, 221 uint32_t *tiling_mode) 222{ 223 unsigned long min_size, max_size; 224 unsigned long i; 225 226 if (*tiling_mode == I915_TILING_NONE) 227 return size; 228 229 /* 965+ just need multiples of page size for tiling */ 230 if (bufmgr_gem->gen >= 4) 231 return ROUND_UP_TO(size, 4096); 232 233 /* Older chips need powers of two, of at least 512k or 1M */ 234 if (bufmgr_gem->gen == 3) { 235 min_size = 1024*1024; 236 max_size = 128*1024*1024; 237 } else { 238 min_size = 512*1024; 239 max_size = 64*1024*1024; 240 } 241 242 if (size > max_size) { 243 *tiling_mode = I915_TILING_NONE; 244 return size; 245 } 246 247 /* Do we need to allocate every page for the fence? */ 248 if (bufmgr_gem->has_relaxed_fencing) 249 return ROUND_UP_TO(size, 4096); 250 251 for (i = min_size; i < size; i <<= 1) 252 ; 253 254 return i; 255} 256 257/* 258 * Round a given pitch up to the minimum required for X tiling on a 259 * given chip. We use 512 as the minimum to allow for a later tiling 260 * change. 261 */ 262static unsigned long 263drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, 264 unsigned long pitch, uint32_t *tiling_mode) 265{ 266 unsigned long tile_width; 267 unsigned long i; 268 269 /* If untiled, then just align it so that we can do rendering 270 * to it with the 3D engine. 271 */ 272 if (*tiling_mode == I915_TILING_NONE) 273 return ALIGN(pitch, 64); 274 275 if (*tiling_mode == I915_TILING_X) 276 tile_width = 512; 277 else 278 tile_width = 128; 279 280 /* 965 is flexible */ 281 if (bufmgr_gem->gen >= 4) 282 return ROUND_UP_TO(pitch, tile_width); 283 284 /* The older hardware has a maximum pitch of 8192 with tiled 285 * surfaces, so fallback to untiled if it's too large. 286 */ 287 if (pitch > 8192) { 288 *tiling_mode = I915_TILING_NONE; 289 return ALIGN(pitch, 64); 290 } 291 292 /* Pre-965 needs power of two tile width */ 293 for (i = tile_width; i < pitch; i <<= 1) 294 ; 295 296 return i; 297} 298 299static struct drm_intel_gem_bo_bucket * 300drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, 301 unsigned long size) 302{ 303 int i; 304 305 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 306 struct drm_intel_gem_bo_bucket *bucket = 307 &bufmgr_gem->cache_bucket[i]; 308 if (bucket->size >= size) { 309 return bucket; 310 } 311 } 312 313 return NULL; 314} 315 316static void 317drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) 318{ 319 int i, j; 320 321 for (i = 0; i < bufmgr_gem->exec_count; i++) { 322 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 323 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 324 325 if (bo_gem->relocs == NULL) { 326 DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, 327 bo_gem->name); 328 continue; 329 } 330 331 for (j = 0; j < bo_gem->reloc_count; j++) { 332 drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo; 333 drm_intel_bo_gem *target_gem = 334 (drm_intel_bo_gem *) target_bo; 335 336 DBG("%2d: %d (%s)@0x%08llx -> " 337 "%d (%s)@0x%08lx + 0x%08x\n", 338 i, 339 bo_gem->gem_handle, bo_gem->name, 340 (unsigned long long)bo_gem->relocs[j].offset, 341 target_gem->gem_handle, 342 target_gem->name, 343 target_bo->offset, 344 bo_gem->relocs[j].delta); 345 } 346 } 347} 348 349static inline void 350drm_intel_gem_bo_reference(drm_intel_bo *bo) 351{ 352 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 353 354 atomic_inc(&bo_gem->refcount); 355} 356 357/** 358 * Adds the given buffer to the list of buffers to be validated (moved into the 359 * appropriate memory type) with the next batch submission. 360 * 361 * If a buffer is validated multiple times in a batch submission, it ends up 362 * with the intersection of the memory type flags and the union of the 363 * access flags. 364 */ 365static void 366drm_intel_add_validate_buffer(drm_intel_bo *bo) 367{ 368 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 369 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 370 int index; 371 372 if (bo_gem->validate_index != -1) 373 return; 374 375 /* Extend the array of validation entries as necessary. */ 376 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 377 int new_size = bufmgr_gem->exec_size * 2; 378 379 if (new_size == 0) 380 new_size = 5; 381 382 bufmgr_gem->exec_objects = 383 realloc(bufmgr_gem->exec_objects, 384 sizeof(*bufmgr_gem->exec_objects) * new_size); 385 bufmgr_gem->exec_bos = 386 realloc(bufmgr_gem->exec_bos, 387 sizeof(*bufmgr_gem->exec_bos) * new_size); 388 bufmgr_gem->exec_size = new_size; 389 } 390 391 index = bufmgr_gem->exec_count; 392 bo_gem->validate_index = index; 393 /* Fill in array entry */ 394 bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; 395 bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; 396 bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs; 397 bufmgr_gem->exec_objects[index].alignment = 0; 398 bufmgr_gem->exec_objects[index].offset = 0; 399 bufmgr_gem->exec_bos[index] = bo; 400 bufmgr_gem->exec_count++; 401} 402 403static void 404drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) 405{ 406 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 407 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 408 int index; 409 410 if (bo_gem->validate_index != -1) { 411 if (need_fence) 412 bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= 413 EXEC_OBJECT_NEEDS_FENCE; 414 return; 415 } 416 417 /* Extend the array of validation entries as necessary. */ 418 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 419 int new_size = bufmgr_gem->exec_size * 2; 420 421 if (new_size == 0) 422 new_size = 5; 423 424 bufmgr_gem->exec2_objects = 425 realloc(bufmgr_gem->exec2_objects, 426 sizeof(*bufmgr_gem->exec2_objects) * new_size); 427 bufmgr_gem->exec_bos = 428 realloc(bufmgr_gem->exec_bos, 429 sizeof(*bufmgr_gem->exec_bos) * new_size); 430 bufmgr_gem->exec_size = new_size; 431 } 432 433 index = bufmgr_gem->exec_count; 434 bo_gem->validate_index = index; 435 /* Fill in array entry */ 436 bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle; 437 bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; 438 bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; 439 bufmgr_gem->exec2_objects[index].alignment = 0; 440 bufmgr_gem->exec2_objects[index].offset = 0; 441 bufmgr_gem->exec_bos[index] = bo; 442 bufmgr_gem->exec2_objects[index].flags = 0; 443 bufmgr_gem->exec2_objects[index].rsvd1 = 0; 444 bufmgr_gem->exec2_objects[index].rsvd2 = 0; 445 if (need_fence) { 446 bufmgr_gem->exec2_objects[index].flags |= 447 EXEC_OBJECT_NEEDS_FENCE; 448 } 449 bufmgr_gem->exec_count++; 450} 451 452#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ 453 sizeof(uint32_t)) 454 455static void 456drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, 457 drm_intel_bo_gem *bo_gem) 458{ 459 int size; 460 461 assert(!bo_gem->used_as_reloc_target); 462 463 /* The older chipsets are far-less flexible in terms of tiling, 464 * and require tiled buffer to be size aligned in the aperture. 465 * This means that in the worst possible case we will need a hole 466 * twice as large as the object in order for it to fit into the 467 * aperture. Optimal packing is for wimps. 468 */ 469 size = bo_gem->bo.size; 470 if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) 471 size *= 2; 472 473 bo_gem->reloc_tree_size = size; 474} 475 476static int 477drm_intel_setup_reloc_list(drm_intel_bo *bo) 478{ 479 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 480 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 481 unsigned int max_relocs = bufmgr_gem->max_relocs; 482 483 if (bo->size / 4 < max_relocs) 484 max_relocs = bo->size / 4; 485 486 bo_gem->relocs = malloc(max_relocs * 487 sizeof(struct drm_i915_gem_relocation_entry)); 488 bo_gem->reloc_target_info = malloc(max_relocs * 489 sizeof(drm_intel_reloc_target)); 490 if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) { 491 bo_gem->has_error = 1; 492 493 free (bo_gem->relocs); 494 bo_gem->relocs = NULL; 495 496 free (bo_gem->reloc_target_info); 497 bo_gem->reloc_target_info = NULL; 498 499 return 1; 500 } 501 502 return 0; 503} 504 505static int 506drm_intel_gem_bo_busy(drm_intel_bo *bo) 507{ 508 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 509 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 510 struct drm_i915_gem_busy busy; 511 int ret; 512 513 memset(&busy, 0, sizeof(busy)); 514 busy.handle = bo_gem->gem_handle; 515 516 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 517 518 return (ret == 0 && busy.busy); 519} 520 521static int 522drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, 523 drm_intel_bo_gem *bo_gem, int state) 524{ 525 struct drm_i915_gem_madvise madv; 526 527 madv.handle = bo_gem->gem_handle; 528 madv.madv = state; 529 madv.retained = 1; 530 drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 531 532 return madv.retained; 533} 534 535static int 536drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv) 537{ 538 return drm_intel_gem_bo_madvise_internal 539 ((drm_intel_bufmgr_gem *) bo->bufmgr, 540 (drm_intel_bo_gem *) bo, 541 madv); 542} 543 544/* drop the oldest entries that have been purged by the kernel */ 545static void 546drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, 547 struct drm_intel_gem_bo_bucket *bucket) 548{ 549 while (!DRMLISTEMPTY(&bucket->head)) { 550 drm_intel_bo_gem *bo_gem; 551 552 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 553 bucket->head.next, head); 554 if (drm_intel_gem_bo_madvise_internal 555 (bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) 556 break; 557 558 DRMLISTDEL(&bo_gem->head); 559 drm_intel_gem_bo_free(&bo_gem->bo); 560 } 561} 562 563static drm_intel_bo * 564drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, 565 const char *name, 566 unsigned long size, 567 unsigned long flags, 568 uint32_t tiling_mode, 569 unsigned long stride) 570{ 571 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 572 drm_intel_bo_gem *bo_gem; 573 unsigned int page_size = getpagesize(); 574 int ret; 575 struct drm_intel_gem_bo_bucket *bucket; 576 int alloc_from_cache; 577 unsigned long bo_size; 578 int for_render = 0; 579 580 if (flags & BO_ALLOC_FOR_RENDER) 581 for_render = 1; 582 583 /* Round the allocated size up to a power of two number of pages. */ 584 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); 585 586 /* If we don't have caching at this size, don't actually round the 587 * allocation up. 588 */ 589 if (bucket == NULL) { 590 bo_size = size; 591 if (bo_size < page_size) 592 bo_size = page_size; 593 } else { 594 bo_size = bucket->size; 595 } 596 597 pthread_mutex_lock(&bufmgr_gem->lock); 598 /* Get a buffer out of the cache if available */ 599retry: 600 alloc_from_cache = 0; 601 if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { 602 if (for_render) { 603 /* Allocate new render-target BOs from the tail (MRU) 604 * of the list, as it will likely be hot in the GPU 605 * cache and in the aperture for us. 606 */ 607 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 608 bucket->head.prev, head); 609 DRMLISTDEL(&bo_gem->head); 610 alloc_from_cache = 1; 611 } else { 612 /* For non-render-target BOs (where we're probably 613 * going to map it first thing in order to fill it 614 * with data), check if the last BO in the cache is 615 * unbusy, and only reuse in that case. Otherwise, 616 * allocating a new buffer is probably faster than 617 * waiting for the GPU to finish. 618 */ 619 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 620 bucket->head.next, head); 621 if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { 622 alloc_from_cache = 1; 623 DRMLISTDEL(&bo_gem->head); 624 } 625 } 626 627 if (alloc_from_cache) { 628 if (!drm_intel_gem_bo_madvise_internal 629 (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { 630 drm_intel_gem_bo_free(&bo_gem->bo); 631 drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, 632 bucket); 633 goto retry; 634 } 635 636 if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 637 tiling_mode, 638 stride)) { 639 drm_intel_gem_bo_free(&bo_gem->bo); 640 goto retry; 641 } 642 } 643 } 644 pthread_mutex_unlock(&bufmgr_gem->lock); 645 646 if (!alloc_from_cache) { 647 struct drm_i915_gem_create create; 648 649 bo_gem = calloc(1, sizeof(*bo_gem)); 650 if (!bo_gem) 651 return NULL; 652 653 bo_gem->bo.size = bo_size; 654 memset(&create, 0, sizeof(create)); 655 create.size = bo_size; 656 657 ret = drmIoctl(bufmgr_gem->fd, 658 DRM_IOCTL_I915_GEM_CREATE, 659 &create); 660 bo_gem->gem_handle = create.handle; 661 bo_gem->bo.handle = bo_gem->gem_handle; 662 if (ret != 0) { 663 free(bo_gem); 664 return NULL; 665 } 666 bo_gem->bo.bufmgr = bufmgr; 667 668 bo_gem->tiling_mode = I915_TILING_NONE; 669 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 670 bo_gem->stride = 0; 671 672 if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 673 tiling_mode, 674 stride)) { 675 drm_intel_gem_bo_free(&bo_gem->bo); 676 return NULL; 677 } 678 } 679 680 bo_gem->name = name; 681 atomic_set(&bo_gem->refcount, 1); 682 bo_gem->validate_index = -1; 683 bo_gem->reloc_tree_fences = 0; 684 bo_gem->used_as_reloc_target = 0; 685 bo_gem->has_error = 0; 686 bo_gem->reusable = 1; 687 688 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 689 690 DBG("bo_create: buf %d (%s) %ldb\n", 691 bo_gem->gem_handle, bo_gem->name, size); 692 693 return &bo_gem->bo; 694} 695 696static drm_intel_bo * 697drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, 698 const char *name, 699 unsigned long size, 700 unsigned int alignment) 701{ 702 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 703 BO_ALLOC_FOR_RENDER, 704 I915_TILING_NONE, 0); 705} 706 707static drm_intel_bo * 708drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, 709 const char *name, 710 unsigned long size, 711 unsigned int alignment) 712{ 713 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0, 714 I915_TILING_NONE, 0); 715} 716 717static drm_intel_bo * 718drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, 719 int x, int y, int cpp, uint32_t *tiling_mode, 720 unsigned long *pitch, unsigned long flags) 721{ 722 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 723 unsigned long size, stride; 724 uint32_t tiling; 725 726 do { 727 unsigned long aligned_y; 728 729 tiling = *tiling_mode; 730 731 /* If we're tiled, our allocations are in 8 or 32-row blocks, 732 * so failure to align our height means that we won't allocate 733 * enough pages. 734 * 735 * If we're untiled, we still have to align to 2 rows high 736 * because the data port accesses 2x2 blocks even if the 737 * bottom row isn't to be rendered, so failure to align means 738 * we could walk off the end of the GTT and fault. This is 739 * documented on 965, and may be the case on older chipsets 740 * too so we try to be careful. 741 */ 742 aligned_y = y; 743 if (tiling == I915_TILING_NONE) 744 aligned_y = ALIGN(y, 2); 745 else if (tiling == I915_TILING_X) 746 aligned_y = ALIGN(y, 8); 747 else if (tiling == I915_TILING_Y) 748 aligned_y = ALIGN(y, 32); 749 750 stride = x * cpp; 751 stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode); 752 size = stride * aligned_y; 753 size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode); 754 } while (*tiling_mode != tiling); 755 *pitch = stride; 756 757 if (tiling == I915_TILING_NONE) 758 stride = 0; 759 760 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags, 761 tiling, stride); 762} 763 764/** 765 * Returns a drm_intel_bo wrapping the given buffer object handle. 766 * 767 * This can be used when one application needs to pass a buffer object 768 * to another. 769 */ 770drm_intel_bo * 771drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, 772 const char *name, 773 unsigned int handle) 774{ 775 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 776 drm_intel_bo_gem *bo_gem; 777 int ret; 778 struct drm_gem_open open_arg; 779 struct drm_i915_gem_get_tiling get_tiling; 780 781 bo_gem = calloc(1, sizeof(*bo_gem)); 782 if (!bo_gem) 783 return NULL; 784 785 memset(&open_arg, 0, sizeof(open_arg)); 786 open_arg.name = handle; 787 ret = drmIoctl(bufmgr_gem->fd, 788 DRM_IOCTL_GEM_OPEN, 789 &open_arg); 790 if (ret != 0) { 791 DBG("Couldn't reference %s handle 0x%08x: %s\n", 792 name, handle, strerror(errno)); 793 free(bo_gem); 794 return NULL; 795 } 796 bo_gem->bo.size = open_arg.size; 797 bo_gem->bo.offset = 0; 798 bo_gem->bo.virtual = NULL; 799 bo_gem->bo.bufmgr = bufmgr; 800 bo_gem->name = name; 801 atomic_set(&bo_gem->refcount, 1); 802 bo_gem->validate_index = -1; 803 bo_gem->gem_handle = open_arg.handle; 804 bo_gem->global_name = handle; 805 bo_gem->reusable = 0; 806 807 memset(&get_tiling, 0, sizeof(get_tiling)); 808 get_tiling.handle = bo_gem->gem_handle; 809 ret = drmIoctl(bufmgr_gem->fd, 810 DRM_IOCTL_I915_GEM_GET_TILING, 811 &get_tiling); 812 if (ret != 0) { 813 drm_intel_gem_bo_unreference(&bo_gem->bo); 814 return NULL; 815 } 816 bo_gem->tiling_mode = get_tiling.tiling_mode; 817 bo_gem->swizzle_mode = get_tiling.swizzle_mode; 818 /* XXX stride is unknown */ 819 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 820 821 DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); 822 823 return &bo_gem->bo; 824} 825 826static void 827drm_intel_gem_bo_free(drm_intel_bo *bo) 828{ 829 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 830 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 831 struct drm_gem_close close; 832 int ret; 833 834 if (bo_gem->mem_virtual) 835 munmap(bo_gem->mem_virtual, bo_gem->bo.size); 836 if (bo_gem->gtt_virtual) 837 munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 838 839 /* Close this object */ 840 memset(&close, 0, sizeof(close)); 841 close.handle = bo_gem->gem_handle; 842 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); 843 if (ret != 0) { 844 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 845 bo_gem->gem_handle, bo_gem->name, strerror(errno)); 846 } 847 free(bo); 848} 849 850/** Frees all cached buffers significantly older than @time. */ 851static void 852drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) 853{ 854 int i; 855 856 if (bufmgr_gem->time == time) 857 return; 858 859 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 860 struct drm_intel_gem_bo_bucket *bucket = 861 &bufmgr_gem->cache_bucket[i]; 862 863 while (!DRMLISTEMPTY(&bucket->head)) { 864 drm_intel_bo_gem *bo_gem; 865 866 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 867 bucket->head.next, head); 868 if (time - bo_gem->free_time <= 1) 869 break; 870 871 DRMLISTDEL(&bo_gem->head); 872 873 drm_intel_gem_bo_free(&bo_gem->bo); 874 } 875 } 876 877 bufmgr_gem->time = time; 878} 879 880static void 881drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) 882{ 883 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 884 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 885 struct drm_intel_gem_bo_bucket *bucket; 886 int i; 887 888 /* Unreference all the target buffers */ 889 for (i = 0; i < bo_gem->reloc_count; i++) { 890 if (bo_gem->reloc_target_info[i].bo != bo) { 891 drm_intel_gem_bo_unreference_locked_timed(bo_gem-> 892 reloc_target_info[i].bo, 893 time); 894 } 895 } 896 bo_gem->reloc_count = 0; 897 bo_gem->used_as_reloc_target = 0; 898 899 DBG("bo_unreference final: %d (%s)\n", 900 bo_gem->gem_handle, bo_gem->name); 901 902 /* release memory associated with this object */ 903 if (bo_gem->reloc_target_info) { 904 free(bo_gem->reloc_target_info); 905 bo_gem->reloc_target_info = NULL; 906 } 907 if (bo_gem->relocs) { 908 free(bo_gem->relocs); 909 bo_gem->relocs = NULL; 910 } 911 912 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); 913 /* Put the buffer into our internal cache for reuse if we can. */ 914 if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && 915 drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, 916 I915_MADV_DONTNEED)) { 917 bo_gem->free_time = time; 918 919 bo_gem->name = NULL; 920 bo_gem->validate_index = -1; 921 922 DRMLISTADDTAIL(&bo_gem->head, &bucket->head); 923 } else { 924 drm_intel_gem_bo_free(bo); 925 } 926} 927 928static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 929 time_t time) 930{ 931 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 932 933 assert(atomic_read(&bo_gem->refcount) > 0); 934 if (atomic_dec_and_test(&bo_gem->refcount)) 935 drm_intel_gem_bo_unreference_final(bo, time); 936} 937 938static void drm_intel_gem_bo_unreference(drm_intel_bo *bo) 939{ 940 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 941 942 assert(atomic_read(&bo_gem->refcount) > 0); 943 if (atomic_dec_and_test(&bo_gem->refcount)) { 944 drm_intel_bufmgr_gem *bufmgr_gem = 945 (drm_intel_bufmgr_gem *) bo->bufmgr; 946 struct timespec time; 947 948 clock_gettime(CLOCK_MONOTONIC, &time); 949 950 pthread_mutex_lock(&bufmgr_gem->lock); 951 drm_intel_gem_bo_unreference_final(bo, time.tv_sec); 952 drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec); 953 pthread_mutex_unlock(&bufmgr_gem->lock); 954 } 955} 956 957static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) 958{ 959 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 960 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 961 struct drm_i915_gem_set_domain set_domain; 962 int ret; 963 964 pthread_mutex_lock(&bufmgr_gem->lock); 965 966 /* Allow recursive mapping. Mesa may recursively map buffers with 967 * nested display loops. 968 */ 969 if (!bo_gem->mem_virtual) { 970 struct drm_i915_gem_mmap mmap_arg; 971 972 DBG("bo_map: %d (%s)\n", bo_gem->gem_handle, bo_gem->name); 973 974 memset(&mmap_arg, 0, sizeof(mmap_arg)); 975 mmap_arg.handle = bo_gem->gem_handle; 976 mmap_arg.offset = 0; 977 mmap_arg.size = bo->size; 978 ret = drmIoctl(bufmgr_gem->fd, 979 DRM_IOCTL_I915_GEM_MMAP, 980 &mmap_arg); 981 if (ret != 0) { 982 ret = -errno; 983 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 984 __FILE__, __LINE__, bo_gem->gem_handle, 985 bo_gem->name, strerror(errno)); 986 pthread_mutex_unlock(&bufmgr_gem->lock); 987 return ret; 988 } 989 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 990 } 991 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 992 bo_gem->mem_virtual); 993 bo->virtual = bo_gem->mem_virtual; 994 995 set_domain.handle = bo_gem->gem_handle; 996 set_domain.read_domains = I915_GEM_DOMAIN_CPU; 997 if (write_enable) 998 set_domain.write_domain = I915_GEM_DOMAIN_CPU; 999 else 1000 set_domain.write_domain = 0; 1001 ret = drmIoctl(bufmgr_gem->fd, 1002 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1003 &set_domain); 1004 if (ret != 0) { 1005 DBG("%s:%d: Error setting to CPU domain %d: %s\n", 1006 __FILE__, __LINE__, bo_gem->gem_handle, 1007 strerror(errno)); 1008 } 1009 1010 pthread_mutex_unlock(&bufmgr_gem->lock); 1011 1012 return 0; 1013} 1014 1015int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) 1016{ 1017 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1018 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1019 struct drm_i915_gem_set_domain set_domain; 1020 int ret; 1021 1022 pthread_mutex_lock(&bufmgr_gem->lock); 1023 1024 /* Get a mapping of the buffer if we haven't before. */ 1025 if (bo_gem->gtt_virtual == NULL) { 1026 struct drm_i915_gem_mmap_gtt mmap_arg; 1027 1028 DBG("bo_map_gtt: mmap %d (%s)\n", bo_gem->gem_handle, 1029 bo_gem->name); 1030 1031 memset(&mmap_arg, 0, sizeof(mmap_arg)); 1032 mmap_arg.handle = bo_gem->gem_handle; 1033 1034 /* Get the fake offset back... */ 1035 ret = drmIoctl(bufmgr_gem->fd, 1036 DRM_IOCTL_I915_GEM_MMAP_GTT, 1037 &mmap_arg); 1038 if (ret != 0) { 1039 ret = -errno; 1040 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n", 1041 __FILE__, __LINE__, 1042 bo_gem->gem_handle, bo_gem->name, 1043 strerror(errno)); 1044 pthread_mutex_unlock(&bufmgr_gem->lock); 1045 return ret; 1046 } 1047 1048 /* and mmap it */ 1049 bo_gem->gtt_virtual = mmap(0, bo->size, PROT_READ | PROT_WRITE, 1050 MAP_SHARED, bufmgr_gem->fd, 1051 mmap_arg.offset); 1052 if (bo_gem->gtt_virtual == MAP_FAILED) { 1053 bo_gem->gtt_virtual = NULL; 1054 ret = -errno; 1055 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 1056 __FILE__, __LINE__, 1057 bo_gem->gem_handle, bo_gem->name, 1058 strerror(errno)); 1059 pthread_mutex_unlock(&bufmgr_gem->lock); 1060 return ret; 1061 } 1062 } 1063 1064 bo->virtual = bo_gem->gtt_virtual; 1065 1066 DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 1067 bo_gem->gtt_virtual); 1068 1069 /* Now move it to the GTT domain so that the CPU caches are flushed */ 1070 set_domain.handle = bo_gem->gem_handle; 1071 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1072 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 1073 ret = drmIoctl(bufmgr_gem->fd, 1074 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1075 &set_domain); 1076 if (ret != 0) { 1077 DBG("%s:%d: Error setting domain %d: %s\n", 1078 __FILE__, __LINE__, bo_gem->gem_handle, 1079 strerror(errno)); 1080 } 1081 1082 pthread_mutex_unlock(&bufmgr_gem->lock); 1083 1084 return 0; 1085} 1086 1087int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) 1088{ 1089 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1090 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1091 int ret = 0; 1092 1093 if (bo == NULL) 1094 return 0; 1095 1096 assert(bo_gem->gtt_virtual != NULL); 1097 1098 pthread_mutex_lock(&bufmgr_gem->lock); 1099 bo->virtual = NULL; 1100 pthread_mutex_unlock(&bufmgr_gem->lock); 1101 1102 return ret; 1103} 1104 1105static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) 1106{ 1107 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1108 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1109 struct drm_i915_gem_sw_finish sw_finish; 1110 int ret; 1111 1112 if (bo == NULL) 1113 return 0; 1114 1115 assert(bo_gem->mem_virtual != NULL); 1116 1117 pthread_mutex_lock(&bufmgr_gem->lock); 1118 1119 /* Cause a flush to happen if the buffer's pinned for scanout, so the 1120 * results show up in a timely manner. 1121 */ 1122 sw_finish.handle = bo_gem->gem_handle; 1123 ret = drmIoctl(bufmgr_gem->fd, 1124 DRM_IOCTL_I915_GEM_SW_FINISH, 1125 &sw_finish); 1126 ret = ret == -1 ? -errno : 0; 1127 1128 bo->virtual = NULL; 1129 pthread_mutex_unlock(&bufmgr_gem->lock); 1130 1131 return ret; 1132} 1133 1134static int 1135drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, 1136 unsigned long size, const void *data) 1137{ 1138 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1139 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1140 struct drm_i915_gem_pwrite pwrite; 1141 int ret; 1142 1143 memset(&pwrite, 0, sizeof(pwrite)); 1144 pwrite.handle = bo_gem->gem_handle; 1145 pwrite.offset = offset; 1146 pwrite.size = size; 1147 pwrite.data_ptr = (uint64_t) (uintptr_t) data; 1148 ret = drmIoctl(bufmgr_gem->fd, 1149 DRM_IOCTL_I915_GEM_PWRITE, 1150 &pwrite); 1151 if (ret != 0) { 1152 ret = -errno; 1153 DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", 1154 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1155 (int)size, strerror(errno)); 1156 } 1157 1158 return ret; 1159} 1160 1161static int 1162drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) 1163{ 1164 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1165 struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; 1166 int ret; 1167 1168 get_pipe_from_crtc_id.crtc_id = crtc_id; 1169 ret = drmIoctl(bufmgr_gem->fd, 1170 DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, 1171 &get_pipe_from_crtc_id); 1172 if (ret != 0) { 1173 /* We return -1 here to signal that we don't 1174 * know which pipe is associated with this crtc. 1175 * This lets the caller know that this information 1176 * isn't available; using the wrong pipe for 1177 * vblank waiting can cause the chipset to lock up 1178 */ 1179 return -1; 1180 } 1181 1182 return get_pipe_from_crtc_id.pipe; 1183} 1184 1185static int 1186drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, 1187 unsigned long size, void *data) 1188{ 1189 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1190 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1191 struct drm_i915_gem_pread pread; 1192 int ret; 1193 1194 memset(&pread, 0, sizeof(pread)); 1195 pread.handle = bo_gem->gem_handle; 1196 pread.offset = offset; 1197 pread.size = size; 1198 pread.data_ptr = (uint64_t) (uintptr_t) data; 1199 ret = drmIoctl(bufmgr_gem->fd, 1200 DRM_IOCTL_I915_GEM_PREAD, 1201 &pread); 1202 if (ret != 0) { 1203 ret = -errno; 1204 DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", 1205 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1206 (int)size, strerror(errno)); 1207 } 1208 1209 return ret; 1210} 1211 1212/** Waits for all GPU rendering to the object to have completed. */ 1213static void 1214drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) 1215{ 1216 drm_intel_gem_bo_start_gtt_access(bo, 0); 1217} 1218 1219/** 1220 * Sets the object to the GTT read and possibly write domain, used by the X 1221 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). 1222 * 1223 * In combination with drm_intel_gem_bo_pin() and manual fence management, we 1224 * can do tiled pixmaps this way. 1225 */ 1226void 1227drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) 1228{ 1229 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1230 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1231 struct drm_i915_gem_set_domain set_domain; 1232 int ret; 1233 1234 set_domain.handle = bo_gem->gem_handle; 1235 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1236 set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; 1237 ret = drmIoctl(bufmgr_gem->fd, 1238 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1239 &set_domain); 1240 if (ret != 0) { 1241 DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", 1242 __FILE__, __LINE__, bo_gem->gem_handle, 1243 set_domain.read_domains, set_domain.write_domain, 1244 strerror(errno)); 1245 } 1246} 1247 1248static void 1249drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) 1250{ 1251 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1252 int i; 1253 1254 free(bufmgr_gem->exec2_objects); 1255 free(bufmgr_gem->exec_objects); 1256 free(bufmgr_gem->exec_bos); 1257 1258 pthread_mutex_destroy(&bufmgr_gem->lock); 1259 1260 /* Free any cached buffer objects we were going to reuse */ 1261 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 1262 struct drm_intel_gem_bo_bucket *bucket = 1263 &bufmgr_gem->cache_bucket[i]; 1264 drm_intel_bo_gem *bo_gem; 1265 1266 while (!DRMLISTEMPTY(&bucket->head)) { 1267 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1268 bucket->head.next, head); 1269 DRMLISTDEL(&bo_gem->head); 1270 1271 drm_intel_gem_bo_free(&bo_gem->bo); 1272 } 1273 } 1274 1275 free(bufmgr); 1276} 1277 1278/** 1279 * Adds the target buffer to the validation list and adds the relocation 1280 * to the reloc_buffer's relocation list. 1281 * 1282 * The relocation entry at the given offset must already contain the 1283 * precomputed relocation value, because the kernel will optimize out 1284 * the relocation entry write when the buffer hasn't moved from the 1285 * last known offset in target_bo. 1286 */ 1287static int 1288do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 1289 drm_intel_bo *target_bo, uint32_t target_offset, 1290 uint32_t read_domains, uint32_t write_domain, 1291 int need_fence) 1292{ 1293 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1294 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1295 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 1296 1297 if (bo_gem->has_error) 1298 return -ENOMEM; 1299 1300 if (target_bo_gem->has_error) { 1301 bo_gem->has_error = 1; 1302 return -ENOMEM; 1303 } 1304 1305 if (target_bo_gem->tiling_mode == I915_TILING_NONE) 1306 need_fence = 0; 1307 1308 /* We never use HW fences for rendering on 965+ */ 1309 if (bufmgr_gem->gen >= 4) 1310 need_fence = 0; 1311 1312 /* Create a new relocation list if needed */ 1313 if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo)) 1314 return -ENOMEM; 1315 1316 /* Check overflow */ 1317 assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); 1318 1319 /* Check args */ 1320 assert(offset <= bo->size - 4); 1321 assert((write_domain & (write_domain - 1)) == 0); 1322 1323 /* Make sure that we're not adding a reloc to something whose size has 1324 * already been accounted for. 1325 */ 1326 assert(!bo_gem->used_as_reloc_target); 1327 if (target_bo_gem != bo_gem) { 1328 target_bo_gem->used_as_reloc_target = 1; 1329 bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; 1330 } 1331 /* An object needing a fence is a tiled buffer, so it won't have 1332 * relocs to other buffers. 1333 */ 1334 if (need_fence) 1335 target_bo_gem->reloc_tree_fences = 1; 1336 bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; 1337 1338 bo_gem->relocs[bo_gem->reloc_count].offset = offset; 1339 bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; 1340 bo_gem->relocs[bo_gem->reloc_count].target_handle = 1341 target_bo_gem->gem_handle; 1342 bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; 1343 bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; 1344 bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset; 1345 1346 bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; 1347 if (target_bo != bo) 1348 drm_intel_gem_bo_reference(target_bo); 1349 if (need_fence) 1350 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 1351 DRM_INTEL_RELOC_FENCE; 1352 else 1353 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; 1354 1355 bo_gem->reloc_count++; 1356 1357 return 0; 1358} 1359 1360static int 1361drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 1362 drm_intel_bo *target_bo, uint32_t target_offset, 1363 uint32_t read_domains, uint32_t write_domain) 1364{ 1365 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1366 1367 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 1368 read_domains, write_domain, 1369 !bufmgr_gem->fenced_relocs); 1370} 1371 1372static int 1373drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, 1374 drm_intel_bo *target_bo, 1375 uint32_t target_offset, 1376 uint32_t read_domains, uint32_t write_domain) 1377{ 1378 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 1379 read_domains, write_domain, 1); 1380} 1381 1382/** 1383 * Walk the tree of relocations rooted at BO and accumulate the list of 1384 * validations to be performed and update the relocation buffers with 1385 * index values into the validation list. 1386 */ 1387static void 1388drm_intel_gem_bo_process_reloc(drm_intel_bo *bo) 1389{ 1390 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1391 int i; 1392 1393 if (bo_gem->relocs == NULL) 1394 return; 1395 1396 for (i = 0; i < bo_gem->reloc_count; i++) { 1397 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 1398 1399 if (target_bo == bo) 1400 continue; 1401 1402 /* Continue walking the tree depth-first. */ 1403 drm_intel_gem_bo_process_reloc(target_bo); 1404 1405 /* Add the target to the validate list */ 1406 drm_intel_add_validate_buffer(target_bo); 1407 } 1408} 1409 1410static void 1411drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) 1412{ 1413 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1414 int i; 1415 1416 if (bo_gem->relocs == NULL) 1417 return; 1418 1419 for (i = 0; i < bo_gem->reloc_count; i++) { 1420 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 1421 int need_fence; 1422 1423 if (target_bo == bo) 1424 continue; 1425 1426 /* Continue walking the tree depth-first. */ 1427 drm_intel_gem_bo_process_reloc2(target_bo); 1428 1429 need_fence = (bo_gem->reloc_target_info[i].flags & 1430 DRM_INTEL_RELOC_FENCE); 1431 1432 /* Add the target to the validate list */ 1433 drm_intel_add_validate_buffer2(target_bo, need_fence); 1434 } 1435} 1436 1437 1438static void 1439drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) 1440{ 1441 int i; 1442 1443 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1444 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1445 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1446 1447 /* Update the buffer offset */ 1448 if (bufmgr_gem->exec_objects[i].offset != bo->offset) { 1449 DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 1450 bo_gem->gem_handle, bo_gem->name, bo->offset, 1451 (unsigned long long)bufmgr_gem->exec_objects[i]. 1452 offset); 1453 bo->offset = bufmgr_gem->exec_objects[i].offset; 1454 } 1455 } 1456} 1457 1458static void 1459drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) 1460{ 1461 int i; 1462 1463 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1464 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1465 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1466 1467 /* Update the buffer offset */ 1468 if (bufmgr_gem->exec2_objects[i].offset != bo->offset) { 1469 DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 1470 bo_gem->gem_handle, bo_gem->name, bo->offset, 1471 (unsigned long long)bufmgr_gem->exec2_objects[i].offset); 1472 bo->offset = bufmgr_gem->exec2_objects[i].offset; 1473 } 1474 } 1475} 1476 1477static int 1478drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, 1479 drm_clip_rect_t * cliprects, int num_cliprects, int DR4) 1480{ 1481 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1482 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1483 struct drm_i915_gem_execbuffer execbuf; 1484 int ret, i; 1485 1486 if (bo_gem->has_error) 1487 return -ENOMEM; 1488 1489 pthread_mutex_lock(&bufmgr_gem->lock); 1490 /* Update indices and set up the validate list. */ 1491 drm_intel_gem_bo_process_reloc(bo); 1492 1493 /* Add the batch buffer to the validation list. There are no 1494 * relocations pointing to it. 1495 */ 1496 drm_intel_add_validate_buffer(bo); 1497 1498 execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; 1499 execbuf.buffer_count = bufmgr_gem->exec_count; 1500 execbuf.batch_start_offset = 0; 1501 execbuf.batch_len = used; 1502 execbuf.cliprects_ptr = (uintptr_t) cliprects; 1503 execbuf.num_cliprects = num_cliprects; 1504 execbuf.DR1 = 0; 1505 execbuf.DR4 = DR4; 1506 1507 ret = drmIoctl(bufmgr_gem->fd, 1508 DRM_IOCTL_I915_GEM_EXECBUFFER, 1509 &execbuf); 1510 if (ret != 0) { 1511 ret = -errno; 1512 if (errno == ENOSPC) { 1513 DBG("Execbuffer fails to pin. " 1514 "Estimate: %u. Actual: %u. Available: %u\n", 1515 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 1516 bufmgr_gem-> 1517 exec_count), 1518 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 1519 bufmgr_gem-> 1520 exec_count), 1521 (unsigned int)bufmgr_gem->gtt_size); 1522 } 1523 } 1524 drm_intel_update_buffer_offsets(bufmgr_gem); 1525 1526 if (bufmgr_gem->bufmgr.debug) 1527 drm_intel_gem_dump_validation_list(bufmgr_gem); 1528 1529 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1530 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1531 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1532 1533 /* Disconnect the buffer from the validate list */ 1534 bo_gem->validate_index = -1; 1535 bufmgr_gem->exec_bos[i] = NULL; 1536 } 1537 bufmgr_gem->exec_count = 0; 1538 pthread_mutex_unlock(&bufmgr_gem->lock); 1539 1540 return ret; 1541} 1542 1543static int 1544drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, 1545 drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 1546 int ring_flag) 1547{ 1548 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1549 struct drm_i915_gem_execbuffer2 execbuf; 1550 int ret, i; 1551 1552 switch (ring_flag) { 1553 default: 1554 return -EINVAL; 1555 case I915_EXEC_BLT: 1556 if (!bufmgr_gem->has_blt) 1557 return -EINVAL; 1558 break; 1559 case I915_EXEC_BSD: 1560 if (!bufmgr_gem->has_bsd) 1561 return -EINVAL; 1562 break; 1563 case I915_EXEC_RENDER: 1564 case I915_EXEC_DEFAULT: 1565 break; 1566 } 1567 1568 pthread_mutex_lock(&bufmgr_gem->lock); 1569 /* Update indices and set up the validate list. */ 1570 drm_intel_gem_bo_process_reloc2(bo); 1571 1572 /* Add the batch buffer to the validation list. There are no relocations 1573 * pointing to it. 1574 */ 1575 drm_intel_add_validate_buffer2(bo, 0); 1576 1577 execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; 1578 execbuf.buffer_count = bufmgr_gem->exec_count; 1579 execbuf.batch_start_offset = 0; 1580 execbuf.batch_len = used; 1581 execbuf.cliprects_ptr = (uintptr_t)cliprects; 1582 execbuf.num_cliprects = num_cliprects; 1583 execbuf.DR1 = 0; 1584 execbuf.DR4 = DR4; 1585 execbuf.flags = ring_flag; 1586 execbuf.rsvd1 = 0; 1587 execbuf.rsvd2 = 0; 1588 1589 ret = drmIoctl(bufmgr_gem->fd, 1590 DRM_IOCTL_I915_GEM_EXECBUFFER2, 1591 &execbuf); 1592 if (ret != 0) { 1593 ret = -errno; 1594 if (ret == -ENOSPC) { 1595 DBG("Execbuffer fails to pin. " 1596 "Estimate: %u. Actual: %u. Available: %u\n", 1597 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 1598 bufmgr_gem->exec_count), 1599 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 1600 bufmgr_gem->exec_count), 1601 (unsigned int) bufmgr_gem->gtt_size); 1602 } 1603 } 1604 drm_intel_update_buffer_offsets2(bufmgr_gem); 1605 1606 if (bufmgr_gem->bufmgr.debug) 1607 drm_intel_gem_dump_validation_list(bufmgr_gem); 1608 1609 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1610 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1611 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1612 1613 /* Disconnect the buffer from the validate list */ 1614 bo_gem->validate_index = -1; 1615 bufmgr_gem->exec_bos[i] = NULL; 1616 } 1617 bufmgr_gem->exec_count = 0; 1618 pthread_mutex_unlock(&bufmgr_gem->lock); 1619 1620 return ret; 1621} 1622 1623static int 1624drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, 1625 drm_clip_rect_t *cliprects, int num_cliprects, 1626 int DR4) 1627{ 1628 return drm_intel_gem_bo_mrb_exec2(bo, used, 1629 cliprects, num_cliprects, DR4, 1630 I915_EXEC_RENDER); 1631} 1632 1633static int 1634drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) 1635{ 1636 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1637 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1638 struct drm_i915_gem_pin pin; 1639 int ret; 1640 1641 memset(&pin, 0, sizeof(pin)); 1642 pin.handle = bo_gem->gem_handle; 1643 pin.alignment = alignment; 1644 1645 ret = drmIoctl(bufmgr_gem->fd, 1646 DRM_IOCTL_I915_GEM_PIN, 1647 &pin); 1648 if (ret != 0) 1649 return -errno; 1650 1651 bo->offset = pin.offset; 1652 return 0; 1653} 1654 1655static int 1656drm_intel_gem_bo_unpin(drm_intel_bo *bo) 1657{ 1658 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1659 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1660 struct drm_i915_gem_unpin unpin; 1661 int ret; 1662 1663 memset(&unpin, 0, sizeof(unpin)); 1664 unpin.handle = bo_gem->gem_handle; 1665 1666 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); 1667 if (ret != 0) 1668 return -errno; 1669 1670 return 0; 1671} 1672 1673static int 1674drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 1675 uint32_t tiling_mode, 1676 uint32_t stride) 1677{ 1678 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1679 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1680 struct drm_i915_gem_set_tiling set_tiling; 1681 int ret; 1682 1683 if (bo_gem->global_name == 0 && 1684 tiling_mode == bo_gem->tiling_mode && 1685 stride == bo_gem->stride) 1686 return 0; 1687 1688 memset(&set_tiling, 0, sizeof(set_tiling)); 1689 do { 1690 /* set_tiling is slightly broken and overwrites the 1691 * input on the error path, so we have to open code 1692 * rmIoctl. 1693 */ 1694 set_tiling.handle = bo_gem->gem_handle; 1695 set_tiling.tiling_mode = tiling_mode; 1696 set_tiling.stride = stride; 1697 1698 ret = ioctl(bufmgr_gem->fd, 1699 DRM_IOCTL_I915_GEM_SET_TILING, 1700 &set_tiling); 1701 } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 1702 if (ret == -1) 1703 return -errno; 1704 1705 bo_gem->tiling_mode = set_tiling.tiling_mode; 1706 bo_gem->swizzle_mode = set_tiling.swizzle_mode; 1707 bo_gem->stride = set_tiling.stride; 1708 return 0; 1709} 1710 1711static int 1712drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 1713 uint32_t stride) 1714{ 1715 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1716 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1717 int ret; 1718 1719 /* Linear buffers have no stride. By ensuring that we only ever use 1720 * stride 0 with linear buffers, we simplify our code. 1721 */ 1722 if (*tiling_mode == I915_TILING_NONE) 1723 stride = 0; 1724 1725 ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride); 1726 if (ret == 0) 1727 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 1728 1729 *tiling_mode = bo_gem->tiling_mode; 1730 return ret; 1731} 1732 1733static int 1734drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 1735 uint32_t * swizzle_mode) 1736{ 1737 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1738 1739 *tiling_mode = bo_gem->tiling_mode; 1740 *swizzle_mode = bo_gem->swizzle_mode; 1741 return 0; 1742} 1743 1744static int 1745drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) 1746{ 1747 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1748 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1749 struct drm_gem_flink flink; 1750 int ret; 1751 1752 if (!bo_gem->global_name) { 1753 memset(&flink, 0, sizeof(flink)); 1754 flink.handle = bo_gem->gem_handle; 1755 1756 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); 1757 if (ret != 0) 1758 return -errno; 1759 bo_gem->global_name = flink.name; 1760 bo_gem->reusable = 0; 1761 } 1762 1763 *name = bo_gem->global_name; 1764 return 0; 1765} 1766 1767/** 1768 * Enables unlimited caching of buffer objects for reuse. 1769 * 1770 * This is potentially very memory expensive, as the cache at each bucket 1771 * size is only bounded by how many buffers of that size we've managed to have 1772 * in flight at once. 1773 */ 1774void 1775drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) 1776{ 1777 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1778 1779 bufmgr_gem->bo_reuse = 1; 1780} 1781 1782/** 1783 * Enable use of fenced reloc type. 1784 * 1785 * New code should enable this to avoid unnecessary fence register 1786 * allocation. If this option is not enabled, all relocs will have fence 1787 * register allocated. 1788 */ 1789void 1790drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr) 1791{ 1792 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 1793 1794 if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2) 1795 bufmgr_gem->fenced_relocs = 1; 1796} 1797 1798/** 1799 * Return the additional aperture space required by the tree of buffer objects 1800 * rooted at bo. 1801 */ 1802static int 1803drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) 1804{ 1805 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1806 int i; 1807 int total = 0; 1808 1809 if (bo == NULL || bo_gem->included_in_check_aperture) 1810 return 0; 1811 1812 total += bo->size; 1813 bo_gem->included_in_check_aperture = 1; 1814 1815 for (i = 0; i < bo_gem->reloc_count; i++) 1816 total += 1817 drm_intel_gem_bo_get_aperture_space(bo_gem-> 1818 reloc_target_info[i].bo); 1819 1820 return total; 1821} 1822 1823/** 1824 * Count the number of buffers in this list that need a fence reg 1825 * 1826 * If the count is greater than the number of available regs, we'll have 1827 * to ask the caller to resubmit a batch with fewer tiled buffers. 1828 * 1829 * This function over-counts if the same buffer is used multiple times. 1830 */ 1831static unsigned int 1832drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count) 1833{ 1834 int i; 1835 unsigned int total = 0; 1836 1837 for (i = 0; i < count; i++) { 1838 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 1839 1840 if (bo_gem == NULL) 1841 continue; 1842 1843 total += bo_gem->reloc_tree_fences; 1844 } 1845 return total; 1846} 1847 1848/** 1849 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready 1850 * for the next drm_intel_bufmgr_check_aperture_space() call. 1851 */ 1852static void 1853drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) 1854{ 1855 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1856 int i; 1857 1858 if (bo == NULL || !bo_gem->included_in_check_aperture) 1859 return; 1860 1861 bo_gem->included_in_check_aperture = 0; 1862 1863 for (i = 0; i < bo_gem->reloc_count; i++) 1864 drm_intel_gem_bo_clear_aperture_space_flag(bo_gem-> 1865 reloc_target_info[i].bo); 1866} 1867 1868/** 1869 * Return a conservative estimate for the amount of aperture required 1870 * for a collection of buffers. This may double-count some buffers. 1871 */ 1872static unsigned int 1873drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) 1874{ 1875 int i; 1876 unsigned int total = 0; 1877 1878 for (i = 0; i < count; i++) { 1879 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 1880 if (bo_gem != NULL) 1881 total += bo_gem->reloc_tree_size; 1882 } 1883 return total; 1884} 1885 1886/** 1887 * Return the amount of aperture needed for a collection of buffers. 1888 * This avoids double counting any buffers, at the cost of looking 1889 * at every buffer in the set. 1890 */ 1891static unsigned int 1892drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) 1893{ 1894 int i; 1895 unsigned int total = 0; 1896 1897 for (i = 0; i < count; i++) { 1898 total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); 1899 /* For the first buffer object in the array, we get an 1900 * accurate count back for its reloc_tree size (since nothing 1901 * had been flagged as being counted yet). We can save that 1902 * value out as a more conservative reloc_tree_size that 1903 * avoids double-counting target buffers. Since the first 1904 * buffer happens to usually be the batch buffer in our 1905 * callers, this can pull us back from doing the tree 1906 * walk on every new batch emit. 1907 */ 1908 if (i == 0) { 1909 drm_intel_bo_gem *bo_gem = 1910 (drm_intel_bo_gem *) bo_array[i]; 1911 bo_gem->reloc_tree_size = total; 1912 } 1913 } 1914 1915 for (i = 0; i < count; i++) 1916 drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]); 1917 return total; 1918} 1919 1920/** 1921 * Return -1 if the batchbuffer should be flushed before attempting to 1922 * emit rendering referencing the buffers pointed to by bo_array. 1923 * 1924 * This is required because if we try to emit a batchbuffer with relocations 1925 * to a tree of buffers that won't simultaneously fit in the aperture, 1926 * the rendering will return an error at a point where the software is not 1927 * prepared to recover from it. 1928 * 1929 * However, we also want to emit the batchbuffer significantly before we reach 1930 * the limit, as a series of batchbuffers each of which references buffers 1931 * covering almost all of the aperture means that at each emit we end up 1932 * waiting to evict a buffer from the last rendering, and we get synchronous 1933 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to 1934 * get better parallelism. 1935 */ 1936static int 1937drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) 1938{ 1939 drm_intel_bufmgr_gem *bufmgr_gem = 1940 (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr; 1941 unsigned int total = 0; 1942 unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; 1943 int total_fences; 1944 1945 /* Check for fence reg constraints if necessary */ 1946 if (bufmgr_gem->available_fences) { 1947 total_fences = drm_intel_gem_total_fences(bo_array, count); 1948 if (total_fences > bufmgr_gem->available_fences) 1949 return -ENOSPC; 1950 } 1951 1952 total = drm_intel_gem_estimate_batch_space(bo_array, count); 1953 1954 if (total > threshold) 1955 total = drm_intel_gem_compute_batch_space(bo_array, count); 1956 1957 if (total > threshold) { 1958 DBG("check_space: overflowed available aperture, " 1959 "%dkb vs %dkb\n", 1960 total / 1024, (int)bufmgr_gem->gtt_size / 1024); 1961 return -ENOSPC; 1962 } else { 1963 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, 1964 (int)bufmgr_gem->gtt_size / 1024); 1965 return 0; 1966 } 1967} 1968 1969/* 1970 * Disable buffer reuse for objects which are shared with the kernel 1971 * as scanout buffers 1972 */ 1973static int 1974drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) 1975{ 1976 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1977 1978 bo_gem->reusable = 0; 1979 return 0; 1980} 1981 1982static int 1983drm_intel_gem_bo_is_reusable(drm_intel_bo *bo) 1984{ 1985 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1986 1987 return bo_gem->reusable; 1988} 1989 1990static int 1991_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 1992{ 1993 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1994 int i; 1995 1996 for (i = 0; i < bo_gem->reloc_count; i++) { 1997 if (bo_gem->reloc_target_info[i].bo == target_bo) 1998 return 1; 1999 if (bo == bo_gem->reloc_target_info[i].bo) 2000 continue; 2001 if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo, 2002 target_bo)) 2003 return 1; 2004 } 2005 2006 return 0; 2007} 2008 2009/** Return true if target_bo is referenced by bo's relocation tree. */ 2010static int 2011drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 2012{ 2013 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 2014 2015 if (bo == NULL || target_bo == NULL) 2016 return 0; 2017 if (target_bo_gem->used_as_reloc_target) 2018 return _drm_intel_gem_bo_references(bo, target_bo); 2019 return 0; 2020} 2021 2022static void 2023add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size) 2024{ 2025 unsigned int i = bufmgr_gem->num_buckets; 2026 2027 assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket)); 2028 2029 DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); 2030 bufmgr_gem->cache_bucket[i].size = size; 2031 bufmgr_gem->num_buckets++; 2032} 2033 2034static void 2035init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem) 2036{ 2037 unsigned long size, cache_max_size = 64 * 1024 * 1024; 2038 2039 /* OK, so power of two buckets was too wasteful of memory. 2040 * Give 3 other sizes between each power of two, to hopefully 2041 * cover things accurately enough. (The alternative is 2042 * probably to just go for exact matching of sizes, and assume 2043 * that for things like composited window resize the tiled 2044 * width/height alignment and rounding of sizes to pages will 2045 * get us useful cache hit rates anyway) 2046 */ 2047 add_bucket(bufmgr_gem, 4096); 2048 add_bucket(bufmgr_gem, 4096 * 2); 2049 add_bucket(bufmgr_gem, 4096 * 3); 2050 2051 /* Initialize the linked lists for BO reuse cache. */ 2052 for (size = 4 * 4096; size <= cache_max_size; size *= 2) { 2053 add_bucket(bufmgr_gem, size); 2054 2055 add_bucket(bufmgr_gem, size + size * 1 / 4); 2056 add_bucket(bufmgr_gem, size + size * 2 / 4); 2057 add_bucket(bufmgr_gem, size + size * 3 / 4); 2058 } 2059} 2060 2061/** 2062 * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 2063 * and manage map buffer objections. 2064 * 2065 * \param fd File descriptor of the opened DRM device. 2066 */ 2067drm_intel_bufmgr * 2068drm_intel_bufmgr_gem_init(int fd, int batch_size) 2069{ 2070 drm_intel_bufmgr_gem *bufmgr_gem; 2071 struct drm_i915_gem_get_aperture aperture; 2072 drm_i915_getparam_t gp; 2073 int ret; 2074 int exec2 = 0; 2075 2076 bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); 2077 if (bufmgr_gem == NULL) 2078 return NULL; 2079 2080 bufmgr_gem->fd = fd; 2081 2082 if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { 2083 free(bufmgr_gem); 2084 return NULL; 2085 } 2086 2087 ret = drmIoctl(bufmgr_gem->fd, 2088 DRM_IOCTL_I915_GEM_GET_APERTURE, 2089 &aperture); 2090 2091 if (ret == 0) 2092 bufmgr_gem->gtt_size = aperture.aper_available_size; 2093 else { 2094 fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", 2095 strerror(errno)); 2096 bufmgr_gem->gtt_size = 128 * 1024 * 1024; 2097 fprintf(stderr, "Assuming %dkB available aperture size.\n" 2098 "May lead to reduced performance or incorrect " 2099 "rendering.\n", 2100 (int)bufmgr_gem->gtt_size / 1024); 2101 } 2102 2103 gp.param = I915_PARAM_CHIPSET_ID; 2104 gp.value = &bufmgr_gem->pci_device; 2105 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2106 if (ret) { 2107 fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); 2108 fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); 2109 } 2110 2111 if (IS_GEN2(bufmgr_gem)) 2112 bufmgr_gem->gen = 2; 2113 else if (IS_GEN3(bufmgr_gem)) 2114 bufmgr_gem->gen = 3; 2115 else if (IS_GEN4(bufmgr_gem)) 2116 bufmgr_gem->gen = 4; 2117 else 2118 bufmgr_gem->gen = 6; 2119 2120 gp.param = I915_PARAM_HAS_EXECBUF2; 2121 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2122 if (!ret) 2123 exec2 = 1; 2124 2125 gp.param = I915_PARAM_HAS_BSD; 2126 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2127 bufmgr_gem->has_bsd = ret == 0; 2128 2129 gp.param = I915_PARAM_HAS_BLT; 2130 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2131 bufmgr_gem->has_blt = ret == 0; 2132 2133 gp.param = I915_PARAM_HAS_RELAXED_FENCING; 2134 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2135 bufmgr_gem->has_relaxed_fencing = ret == 0; 2136 2137 if (bufmgr_gem->gen < 4) { 2138 gp.param = I915_PARAM_NUM_FENCES_AVAIL; 2139 gp.value = &bufmgr_gem->available_fences; 2140 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2141 if (ret) { 2142 fprintf(stderr, "get fences failed: %d [%d]\n", ret, 2143 errno); 2144 fprintf(stderr, "param: %d, val: %d\n", gp.param, 2145 *gp.value); 2146 bufmgr_gem->available_fences = 0; 2147 } else { 2148 /* XXX The kernel reports the total number of fences, 2149 * including any that may be pinned. 2150 * 2151 * We presume that there will be at least one pinned 2152 * fence for the scanout buffer, but there may be more 2153 * than one scanout and the user may be manually 2154 * pinning buffers. Let's move to execbuffer2 and 2155 * thereby forget the insanity of using fences... 2156 */ 2157 bufmgr_gem->available_fences -= 2; 2158 if (bufmgr_gem->available_fences < 0) 2159 bufmgr_gem->available_fences = 0; 2160 } 2161 } 2162 2163 /* Let's go with one relocation per every 2 dwords (but round down a bit 2164 * since a power of two will mean an extra page allocation for the reloc 2165 * buffer). 2166 * 2167 * Every 4 was too few for the blender benchmark. 2168 */ 2169 bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; 2170 2171 bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; 2172 bufmgr_gem->bufmgr.bo_alloc_for_render = 2173 drm_intel_gem_bo_alloc_for_render; 2174 bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled; 2175 bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; 2176 bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; 2177 bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; 2178 bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap; 2179 bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata; 2180 bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; 2181 bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; 2182 bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; 2183 bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence; 2184 bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; 2185 bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; 2186 bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; 2187 bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; 2188 bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; 2189 /* Use the new one if available */ 2190 if (exec2) { 2191 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2; 2192 if (bufmgr_gem->has_bsd|bufmgr_gem->has_blt) 2193 bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2; 2194 } else 2195 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; 2196 bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; 2197 bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise; 2198 bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy; 2199 bufmgr_gem->bufmgr.debug = 0; 2200 bufmgr_gem->bufmgr.check_aperture_space = 2201 drm_intel_gem_check_aperture_space; 2202 bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; 2203 bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable; 2204 bufmgr_gem->bufmgr.get_pipe_from_crtc_id = 2205 drm_intel_gem_get_pipe_from_crtc_id; 2206 bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; 2207 2208 init_cache_buckets(bufmgr_gem); 2209 2210 return &bufmgr_gem->bufmgr; 2211} 2212