intel_bufmgr_gem.c revision 6335e1d28c422050024bcf4100c4fb3a5bac2afb
1/************************************************************************** 2 * 3 * Copyright � 2007 Red Hat Inc. 4 * Copyright � 2007-2012 Intel Corporation 5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA 6 * All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the 10 * "Software"), to deal in the Software without restriction, including 11 * without limitation the rights to use, copy, modify, merge, publish, 12 * distribute, sub license, and/or sell copies of the Software, and to 13 * permit persons to whom the Software is furnished to do so, subject to 14 * the following conditions: 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * The above copyright notice and this permission notice (including the 25 * next paragraph) shall be included in all copies or substantial portions 26 * of the Software. 27 * 28 * 29 **************************************************************************/ 30/* 31 * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com> 32 * Keith Whitwell <keithw-at-tungstengraphics-dot-com> 33 * Eric Anholt <eric@anholt.net> 34 * Dave Airlie <airlied@linux.ie> 35 */ 36 37#ifdef HAVE_CONFIG_H 38#include "config.h" 39#endif 40 41#include <xf86drm.h> 42#include <xf86atomic.h> 43#include <fcntl.h> 44#include <stdio.h> 45#include <stdlib.h> 46#include <string.h> 47#include <unistd.h> 48#include <assert.h> 49#include <pthread.h> 50#include <sys/ioctl.h> 51#include <sys/mman.h> 52#include <sys/stat.h> 53#include <sys/types.h> 54#include <stdbool.h> 55 56#include "errno.h" 57#ifndef ETIME 58#define ETIME ETIMEDOUT 59#endif 60#include "libdrm_lists.h" 61#include "intel_bufmgr.h" 62#include "intel_bufmgr_priv.h" 63#include "intel_chipset.h" 64#include "intel_aub.h" 65#include "string.h" 66 67#include "i915_drm.h" 68 69#ifdef HAVE_VALGRIND 70#include <valgrind.h> 71#include <memcheck.h> 72#define VG(x) x 73#else 74#define VG(x) 75#endif 76 77#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s))) 78 79#define DBG(...) do { \ 80 if (bufmgr_gem->bufmgr.debug) \ 81 fprintf(stderr, __VA_ARGS__); \ 82} while (0) 83 84#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) 85 86typedef struct _drm_intel_bo_gem drm_intel_bo_gem; 87 88struct drm_intel_gem_bo_bucket { 89 drmMMListHead head; 90 unsigned long size; 91}; 92 93typedef struct _drm_intel_bufmgr_gem { 94 drm_intel_bufmgr bufmgr; 95 96 int fd; 97 98 int max_relocs; 99 100 pthread_mutex_t lock; 101 102 struct drm_i915_gem_exec_object *exec_objects; 103 struct drm_i915_gem_exec_object2 *exec2_objects; 104 drm_intel_bo **exec_bos; 105 int exec_size; 106 int exec_count; 107 108 /** Array of lists of cached gem objects of power-of-two sizes */ 109 struct drm_intel_gem_bo_bucket cache_bucket[14 * 4]; 110 int num_buckets; 111 time_t time; 112 113 drmMMListHead named; 114 drmMMListHead vma_cache; 115 int vma_count, vma_open, vma_max; 116 117 uint64_t gtt_size; 118 int available_fences; 119 int pci_device; 120 int gen; 121 unsigned int has_bsd : 1; 122 unsigned int has_blt : 1; 123 unsigned int has_relaxed_fencing : 1; 124 unsigned int has_llc : 1; 125 unsigned int has_wait_timeout : 1; 126 unsigned int bo_reuse : 1; 127 unsigned int no_exec : 1; 128 unsigned int has_vebox : 1; 129 bool fenced_relocs; 130 131 char *aub_filename; 132 FILE *aub_file; 133 uint32_t aub_offset; 134} drm_intel_bufmgr_gem; 135 136#define DRM_INTEL_RELOC_FENCE (1<<0) 137 138typedef struct _drm_intel_reloc_target_info { 139 drm_intel_bo *bo; 140 int flags; 141} drm_intel_reloc_target; 142 143struct _drm_intel_bo_gem { 144 drm_intel_bo bo; 145 146 atomic_t refcount; 147 uint32_t gem_handle; 148 const char *name; 149 150 /** 151 * Kenel-assigned global name for this object 152 */ 153 unsigned int global_name; 154 drmMMListHead name_list; 155 156 /** 157 * Index of the buffer within the validation list while preparing a 158 * batchbuffer execution. 159 */ 160 int validate_index; 161 162 /** 163 * Current tiling mode 164 */ 165 uint32_t tiling_mode; 166 uint32_t swizzle_mode; 167 unsigned long stride; 168 169 time_t free_time; 170 171 /** Array passed to the DRM containing relocation information. */ 172 struct drm_i915_gem_relocation_entry *relocs; 173 /** 174 * Array of info structs corresponding to relocs[i].target_handle etc 175 */ 176 drm_intel_reloc_target *reloc_target_info; 177 /** Number of entries in relocs */ 178 int reloc_count; 179 /** Mapped address for the buffer, saved across map/unmap cycles */ 180 void *mem_virtual; 181 /** GTT virtual address for the buffer, saved across map/unmap cycles */ 182 void *gtt_virtual; 183 int map_count; 184 drmMMListHead vma_list; 185 186 /** BO cache list */ 187 drmMMListHead head; 188 189 /** 190 * Boolean of whether this BO and its children have been included in 191 * the current drm_intel_bufmgr_check_aperture_space() total. 192 */ 193 bool included_in_check_aperture; 194 195 /** 196 * Boolean of whether this buffer has been used as a relocation 197 * target and had its size accounted for, and thus can't have any 198 * further relocations added to it. 199 */ 200 bool used_as_reloc_target; 201 202 /** 203 * Boolean of whether we have encountered an error whilst building the relocation tree. 204 */ 205 bool has_error; 206 207 /** 208 * Boolean of whether this buffer can be re-used 209 */ 210 bool reusable; 211 212 /** 213 * Size in bytes of this buffer and its relocation descendents. 214 * 215 * Used to avoid costly tree walking in 216 * drm_intel_bufmgr_check_aperture in the common case. 217 */ 218 int reloc_tree_size; 219 220 /** 221 * Number of potential fence registers required by this buffer and its 222 * relocations. 223 */ 224 int reloc_tree_fences; 225 226 /** Flags that we may need to do the SW_FINSIH ioctl on unmap. */ 227 bool mapped_cpu_write; 228 229 uint32_t aub_offset; 230 231 drm_intel_aub_annotation *aub_annotations; 232 unsigned aub_annotation_count; 233}; 234 235static unsigned int 236drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count); 237 238static unsigned int 239drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count); 240 241static int 242drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 243 uint32_t * swizzle_mode); 244 245static int 246drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 247 uint32_t tiling_mode, 248 uint32_t stride); 249 250static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 251 time_t time); 252 253static void drm_intel_gem_bo_unreference(drm_intel_bo *bo); 254 255static void drm_intel_gem_bo_free(drm_intel_bo *bo); 256 257static unsigned long 258drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size, 259 uint32_t *tiling_mode) 260{ 261 unsigned long min_size, max_size; 262 unsigned long i; 263 264 if (*tiling_mode == I915_TILING_NONE) 265 return size; 266 267 /* 965+ just need multiples of page size for tiling */ 268 if (bufmgr_gem->gen >= 4) 269 return ROUND_UP_TO(size, 4096); 270 271 /* Older chips need powers of two, of at least 512k or 1M */ 272 if (bufmgr_gem->gen == 3) { 273 min_size = 1024*1024; 274 max_size = 128*1024*1024; 275 } else { 276 min_size = 512*1024; 277 max_size = 64*1024*1024; 278 } 279 280 if (size > max_size) { 281 *tiling_mode = I915_TILING_NONE; 282 return size; 283 } 284 285 /* Do we need to allocate every page for the fence? */ 286 if (bufmgr_gem->has_relaxed_fencing) 287 return ROUND_UP_TO(size, 4096); 288 289 for (i = min_size; i < size; i <<= 1) 290 ; 291 292 return i; 293} 294 295/* 296 * Round a given pitch up to the minimum required for X tiling on a 297 * given chip. We use 512 as the minimum to allow for a later tiling 298 * change. 299 */ 300static unsigned long 301drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem, 302 unsigned long pitch, uint32_t *tiling_mode) 303{ 304 unsigned long tile_width; 305 unsigned long i; 306 307 /* If untiled, then just align it so that we can do rendering 308 * to it with the 3D engine. 309 */ 310 if (*tiling_mode == I915_TILING_NONE) 311 return ALIGN(pitch, 64); 312 313 if (*tiling_mode == I915_TILING_X 314 || (IS_915(bufmgr_gem->pci_device) 315 && *tiling_mode == I915_TILING_Y)) 316 tile_width = 512; 317 else 318 tile_width = 128; 319 320 /* 965 is flexible */ 321 if (bufmgr_gem->gen >= 4) 322 return ROUND_UP_TO(pitch, tile_width); 323 324 /* The older hardware has a maximum pitch of 8192 with tiled 325 * surfaces, so fallback to untiled if it's too large. 326 */ 327 if (pitch > 8192) { 328 *tiling_mode = I915_TILING_NONE; 329 return ALIGN(pitch, 64); 330 } 331 332 /* Pre-965 needs power of two tile width */ 333 for (i = tile_width; i < pitch; i <<= 1) 334 ; 335 336 return i; 337} 338 339static struct drm_intel_gem_bo_bucket * 340drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem, 341 unsigned long size) 342{ 343 int i; 344 345 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 346 struct drm_intel_gem_bo_bucket *bucket = 347 &bufmgr_gem->cache_bucket[i]; 348 if (bucket->size >= size) { 349 return bucket; 350 } 351 } 352 353 return NULL; 354} 355 356static void 357drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem) 358{ 359 int i, j; 360 361 for (i = 0; i < bufmgr_gem->exec_count; i++) { 362 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 363 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 364 365 if (bo_gem->relocs == NULL) { 366 DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, 367 bo_gem->name); 368 continue; 369 } 370 371 for (j = 0; j < bo_gem->reloc_count; j++) { 372 drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo; 373 drm_intel_bo_gem *target_gem = 374 (drm_intel_bo_gem *) target_bo; 375 376 DBG("%2d: %d (%s)@0x%08llx -> " 377 "%d (%s)@0x%08lx + 0x%08x\n", 378 i, 379 bo_gem->gem_handle, bo_gem->name, 380 (unsigned long long)bo_gem->relocs[j].offset, 381 target_gem->gem_handle, 382 target_gem->name, 383 target_bo->offset, 384 bo_gem->relocs[j].delta); 385 } 386 } 387} 388 389static inline void 390drm_intel_gem_bo_reference(drm_intel_bo *bo) 391{ 392 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 393 394 atomic_inc(&bo_gem->refcount); 395} 396 397/** 398 * Adds the given buffer to the list of buffers to be validated (moved into the 399 * appropriate memory type) with the next batch submission. 400 * 401 * If a buffer is validated multiple times in a batch submission, it ends up 402 * with the intersection of the memory type flags and the union of the 403 * access flags. 404 */ 405static void 406drm_intel_add_validate_buffer(drm_intel_bo *bo) 407{ 408 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 409 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 410 int index; 411 412 if (bo_gem->validate_index != -1) 413 return; 414 415 /* Extend the array of validation entries as necessary. */ 416 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 417 int new_size = bufmgr_gem->exec_size * 2; 418 419 if (new_size == 0) 420 new_size = 5; 421 422 bufmgr_gem->exec_objects = 423 realloc(bufmgr_gem->exec_objects, 424 sizeof(*bufmgr_gem->exec_objects) * new_size); 425 bufmgr_gem->exec_bos = 426 realloc(bufmgr_gem->exec_bos, 427 sizeof(*bufmgr_gem->exec_bos) * new_size); 428 bufmgr_gem->exec_size = new_size; 429 } 430 431 index = bufmgr_gem->exec_count; 432 bo_gem->validate_index = index; 433 /* Fill in array entry */ 434 bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle; 435 bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count; 436 bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs; 437 bufmgr_gem->exec_objects[index].alignment = 0; 438 bufmgr_gem->exec_objects[index].offset = 0; 439 bufmgr_gem->exec_bos[index] = bo; 440 bufmgr_gem->exec_count++; 441} 442 443static void 444drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence) 445{ 446 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 447 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 448 int index; 449 450 if (bo_gem->validate_index != -1) { 451 if (need_fence) 452 bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |= 453 EXEC_OBJECT_NEEDS_FENCE; 454 return; 455 } 456 457 /* Extend the array of validation entries as necessary. */ 458 if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) { 459 int new_size = bufmgr_gem->exec_size * 2; 460 461 if (new_size == 0) 462 new_size = 5; 463 464 bufmgr_gem->exec2_objects = 465 realloc(bufmgr_gem->exec2_objects, 466 sizeof(*bufmgr_gem->exec2_objects) * new_size); 467 bufmgr_gem->exec_bos = 468 realloc(bufmgr_gem->exec_bos, 469 sizeof(*bufmgr_gem->exec_bos) * new_size); 470 bufmgr_gem->exec_size = new_size; 471 } 472 473 index = bufmgr_gem->exec_count; 474 bo_gem->validate_index = index; 475 /* Fill in array entry */ 476 bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle; 477 bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count; 478 bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs; 479 bufmgr_gem->exec2_objects[index].alignment = 0; 480 bufmgr_gem->exec2_objects[index].offset = 0; 481 bufmgr_gem->exec_bos[index] = bo; 482 bufmgr_gem->exec2_objects[index].flags = 0; 483 bufmgr_gem->exec2_objects[index].rsvd1 = 0; 484 bufmgr_gem->exec2_objects[index].rsvd2 = 0; 485 if (need_fence) { 486 bufmgr_gem->exec2_objects[index].flags |= 487 EXEC_OBJECT_NEEDS_FENCE; 488 } 489 bufmgr_gem->exec_count++; 490} 491 492#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \ 493 sizeof(uint32_t)) 494 495static void 496drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem, 497 drm_intel_bo_gem *bo_gem) 498{ 499 int size; 500 501 assert(!bo_gem->used_as_reloc_target); 502 503 /* The older chipsets are far-less flexible in terms of tiling, 504 * and require tiled buffer to be size aligned in the aperture. 505 * This means that in the worst possible case we will need a hole 506 * twice as large as the object in order for it to fit into the 507 * aperture. Optimal packing is for wimps. 508 */ 509 size = bo_gem->bo.size; 510 if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) { 511 int min_size; 512 513 if (bufmgr_gem->has_relaxed_fencing) { 514 if (bufmgr_gem->gen == 3) 515 min_size = 1024*1024; 516 else 517 min_size = 512*1024; 518 519 while (min_size < size) 520 min_size *= 2; 521 } else 522 min_size = size; 523 524 /* Account for worst-case alignment. */ 525 size = 2 * min_size; 526 } 527 528 bo_gem->reloc_tree_size = size; 529} 530 531static int 532drm_intel_setup_reloc_list(drm_intel_bo *bo) 533{ 534 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 535 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 536 unsigned int max_relocs = bufmgr_gem->max_relocs; 537 538 if (bo->size / 4 < max_relocs) 539 max_relocs = bo->size / 4; 540 541 bo_gem->relocs = malloc(max_relocs * 542 sizeof(struct drm_i915_gem_relocation_entry)); 543 bo_gem->reloc_target_info = malloc(max_relocs * 544 sizeof(drm_intel_reloc_target)); 545 if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) { 546 bo_gem->has_error = true; 547 548 free (bo_gem->relocs); 549 bo_gem->relocs = NULL; 550 551 free (bo_gem->reloc_target_info); 552 bo_gem->reloc_target_info = NULL; 553 554 return 1; 555 } 556 557 return 0; 558} 559 560static int 561drm_intel_gem_bo_busy(drm_intel_bo *bo) 562{ 563 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 564 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 565 struct drm_i915_gem_busy busy; 566 int ret; 567 568 VG_CLEAR(busy); 569 busy.handle = bo_gem->gem_handle; 570 571 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); 572 573 return (ret == 0 && busy.busy); 574} 575 576static int 577drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem, 578 drm_intel_bo_gem *bo_gem, int state) 579{ 580 struct drm_i915_gem_madvise madv; 581 582 VG_CLEAR(madv); 583 madv.handle = bo_gem->gem_handle; 584 madv.madv = state; 585 madv.retained = 1; 586 drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); 587 588 return madv.retained; 589} 590 591static int 592drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv) 593{ 594 return drm_intel_gem_bo_madvise_internal 595 ((drm_intel_bufmgr_gem *) bo->bufmgr, 596 (drm_intel_bo_gem *) bo, 597 madv); 598} 599 600/* drop the oldest entries that have been purged by the kernel */ 601static void 602drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem, 603 struct drm_intel_gem_bo_bucket *bucket) 604{ 605 while (!DRMLISTEMPTY(&bucket->head)) { 606 drm_intel_bo_gem *bo_gem; 607 608 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 609 bucket->head.next, head); 610 if (drm_intel_gem_bo_madvise_internal 611 (bufmgr_gem, bo_gem, I915_MADV_DONTNEED)) 612 break; 613 614 DRMLISTDEL(&bo_gem->head); 615 drm_intel_gem_bo_free(&bo_gem->bo); 616 } 617} 618 619static drm_intel_bo * 620drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, 621 const char *name, 622 unsigned long size, 623 unsigned long flags, 624 uint32_t tiling_mode, 625 unsigned long stride) 626{ 627 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 628 drm_intel_bo_gem *bo_gem; 629 unsigned int page_size = getpagesize(); 630 int ret; 631 struct drm_intel_gem_bo_bucket *bucket; 632 bool alloc_from_cache; 633 unsigned long bo_size; 634 bool for_render = false; 635 636 if (flags & BO_ALLOC_FOR_RENDER) 637 for_render = true; 638 639 /* Round the allocated size up to a power of two number of pages. */ 640 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size); 641 642 /* If we don't have caching at this size, don't actually round the 643 * allocation up. 644 */ 645 if (bucket == NULL) { 646 bo_size = size; 647 if (bo_size < page_size) 648 bo_size = page_size; 649 } else { 650 bo_size = bucket->size; 651 } 652 653 pthread_mutex_lock(&bufmgr_gem->lock); 654 /* Get a buffer out of the cache if available */ 655retry: 656 alloc_from_cache = false; 657 if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) { 658 if (for_render) { 659 /* Allocate new render-target BOs from the tail (MRU) 660 * of the list, as it will likely be hot in the GPU 661 * cache and in the aperture for us. 662 */ 663 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 664 bucket->head.prev, head); 665 DRMLISTDEL(&bo_gem->head); 666 alloc_from_cache = true; 667 } else { 668 /* For non-render-target BOs (where we're probably 669 * going to map it first thing in order to fill it 670 * with data), check if the last BO in the cache is 671 * unbusy, and only reuse in that case. Otherwise, 672 * allocating a new buffer is probably faster than 673 * waiting for the GPU to finish. 674 */ 675 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 676 bucket->head.next, head); 677 if (!drm_intel_gem_bo_busy(&bo_gem->bo)) { 678 alloc_from_cache = true; 679 DRMLISTDEL(&bo_gem->head); 680 } 681 } 682 683 if (alloc_from_cache) { 684 if (!drm_intel_gem_bo_madvise_internal 685 (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) { 686 drm_intel_gem_bo_free(&bo_gem->bo); 687 drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem, 688 bucket); 689 goto retry; 690 } 691 692 if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 693 tiling_mode, 694 stride)) { 695 drm_intel_gem_bo_free(&bo_gem->bo); 696 goto retry; 697 } 698 } 699 } 700 pthread_mutex_unlock(&bufmgr_gem->lock); 701 702 if (!alloc_from_cache) { 703 struct drm_i915_gem_create create; 704 705 bo_gem = calloc(1, sizeof(*bo_gem)); 706 if (!bo_gem) 707 return NULL; 708 709 bo_gem->bo.size = bo_size; 710 711 VG_CLEAR(create); 712 create.size = bo_size; 713 714 ret = drmIoctl(bufmgr_gem->fd, 715 DRM_IOCTL_I915_GEM_CREATE, 716 &create); 717 bo_gem->gem_handle = create.handle; 718 bo_gem->bo.handle = bo_gem->gem_handle; 719 if (ret != 0) { 720 free(bo_gem); 721 return NULL; 722 } 723 bo_gem->bo.bufmgr = bufmgr; 724 725 bo_gem->tiling_mode = I915_TILING_NONE; 726 bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; 727 bo_gem->stride = 0; 728 729 if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo, 730 tiling_mode, 731 stride)) { 732 drm_intel_gem_bo_free(&bo_gem->bo); 733 return NULL; 734 } 735 736 DRMINITLISTHEAD(&bo_gem->name_list); 737 DRMINITLISTHEAD(&bo_gem->vma_list); 738 } 739 740 bo_gem->name = name; 741 atomic_set(&bo_gem->refcount, 1); 742 bo_gem->validate_index = -1; 743 bo_gem->reloc_tree_fences = 0; 744 bo_gem->used_as_reloc_target = false; 745 bo_gem->has_error = false; 746 bo_gem->reusable = true; 747 bo_gem->aub_annotations = NULL; 748 bo_gem->aub_annotation_count = 0; 749 750 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 751 752 DBG("bo_create: buf %d (%s) %ldb\n", 753 bo_gem->gem_handle, bo_gem->name, size); 754 755 return &bo_gem->bo; 756} 757 758static drm_intel_bo * 759drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, 760 const char *name, 761 unsigned long size, 762 unsigned int alignment) 763{ 764 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 765 BO_ALLOC_FOR_RENDER, 766 I915_TILING_NONE, 0); 767} 768 769static drm_intel_bo * 770drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, 771 const char *name, 772 unsigned long size, 773 unsigned int alignment) 774{ 775 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0, 776 I915_TILING_NONE, 0); 777} 778 779static drm_intel_bo * 780drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name, 781 int x, int y, int cpp, uint32_t *tiling_mode, 782 unsigned long *pitch, unsigned long flags) 783{ 784 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 785 unsigned long size, stride; 786 uint32_t tiling; 787 788 do { 789 unsigned long aligned_y, height_alignment; 790 791 tiling = *tiling_mode; 792 793 /* If we're tiled, our allocations are in 8 or 32-row blocks, 794 * so failure to align our height means that we won't allocate 795 * enough pages. 796 * 797 * If we're untiled, we still have to align to 2 rows high 798 * because the data port accesses 2x2 blocks even if the 799 * bottom row isn't to be rendered, so failure to align means 800 * we could walk off the end of the GTT and fault. This is 801 * documented on 965, and may be the case on older chipsets 802 * too so we try to be careful. 803 */ 804 aligned_y = y; 805 height_alignment = 2; 806 807 if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE) 808 height_alignment = 16; 809 else if (tiling == I915_TILING_X 810 || (IS_915(bufmgr_gem->pci_device) 811 && tiling == I915_TILING_Y)) 812 height_alignment = 8; 813 else if (tiling == I915_TILING_Y) 814 height_alignment = 32; 815 aligned_y = ALIGN(y, height_alignment); 816 817 stride = x * cpp; 818 stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode); 819 size = stride * aligned_y; 820 size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode); 821 } while (*tiling_mode != tiling); 822 *pitch = stride; 823 824 if (tiling == I915_TILING_NONE) 825 stride = 0; 826 827 return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags, 828 tiling, stride); 829} 830 831/** 832 * Returns a drm_intel_bo wrapping the given buffer object handle. 833 * 834 * This can be used when one application needs to pass a buffer object 835 * to another. 836 */ 837drm_intel_bo * 838drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr, 839 const char *name, 840 unsigned int handle) 841{ 842 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 843 drm_intel_bo_gem *bo_gem; 844 int ret; 845 struct drm_gem_open open_arg; 846 struct drm_i915_gem_get_tiling get_tiling; 847 drmMMListHead *list; 848 849 /* At the moment most applications only have a few named bo. 850 * For instance, in a DRI client only the render buffers passed 851 * between X and the client are named. And since X returns the 852 * alternating names for the front/back buffer a linear search 853 * provides a sufficiently fast match. 854 */ 855 for (list = bufmgr_gem->named.next; 856 list != &bufmgr_gem->named; 857 list = list->next) { 858 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list); 859 if (bo_gem->global_name == handle) { 860 drm_intel_gem_bo_reference(&bo_gem->bo); 861 return &bo_gem->bo; 862 } 863 } 864 865 bo_gem = calloc(1, sizeof(*bo_gem)); 866 if (!bo_gem) 867 return NULL; 868 869 VG_CLEAR(open_arg); 870 open_arg.name = handle; 871 ret = drmIoctl(bufmgr_gem->fd, 872 DRM_IOCTL_GEM_OPEN, 873 &open_arg); 874 if (ret != 0) { 875 DBG("Couldn't reference %s handle 0x%08x: %s\n", 876 name, handle, strerror(errno)); 877 free(bo_gem); 878 return NULL; 879 } 880 bo_gem->bo.size = open_arg.size; 881 bo_gem->bo.offset = 0; 882 bo_gem->bo.virtual = NULL; 883 bo_gem->bo.bufmgr = bufmgr; 884 bo_gem->name = name; 885 atomic_set(&bo_gem->refcount, 1); 886 bo_gem->validate_index = -1; 887 bo_gem->gem_handle = open_arg.handle; 888 bo_gem->bo.handle = open_arg.handle; 889 bo_gem->global_name = handle; 890 bo_gem->reusable = false; 891 892 VG_CLEAR(get_tiling); 893 get_tiling.handle = bo_gem->gem_handle; 894 ret = drmIoctl(bufmgr_gem->fd, 895 DRM_IOCTL_I915_GEM_GET_TILING, 896 &get_tiling); 897 if (ret != 0) { 898 drm_intel_gem_bo_unreference(&bo_gem->bo); 899 return NULL; 900 } 901 bo_gem->tiling_mode = get_tiling.tiling_mode; 902 bo_gem->swizzle_mode = get_tiling.swizzle_mode; 903 /* XXX stride is unknown */ 904 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 905 906 DRMINITLISTHEAD(&bo_gem->vma_list); 907 DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 908 DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name); 909 910 return &bo_gem->bo; 911} 912 913static void 914drm_intel_gem_bo_free(drm_intel_bo *bo) 915{ 916 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 917 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 918 struct drm_gem_close close; 919 int ret; 920 921 DRMLISTDEL(&bo_gem->vma_list); 922 if (bo_gem->mem_virtual) { 923 VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0)); 924 munmap(bo_gem->mem_virtual, bo_gem->bo.size); 925 bufmgr_gem->vma_count--; 926 } 927 if (bo_gem->gtt_virtual) { 928 munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 929 bufmgr_gem->vma_count--; 930 } 931 932 /* Close this object */ 933 VG_CLEAR(close); 934 close.handle = bo_gem->gem_handle; 935 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close); 936 if (ret != 0) { 937 DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n", 938 bo_gem->gem_handle, bo_gem->name, strerror(errno)); 939 } 940 free(bo_gem->aub_annotations); 941 free(bo); 942} 943 944static void 945drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo) 946{ 947#if HAVE_VALGRIND 948 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 949 950 if (bo_gem->mem_virtual) 951 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size); 952 953 if (bo_gem->gtt_virtual) 954 VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size); 955#endif 956} 957 958/** Frees all cached buffers significantly older than @time. */ 959static void 960drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time) 961{ 962 int i; 963 964 if (bufmgr_gem->time == time) 965 return; 966 967 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 968 struct drm_intel_gem_bo_bucket *bucket = 969 &bufmgr_gem->cache_bucket[i]; 970 971 while (!DRMLISTEMPTY(&bucket->head)) { 972 drm_intel_bo_gem *bo_gem; 973 974 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 975 bucket->head.next, head); 976 if (time - bo_gem->free_time <= 1) 977 break; 978 979 DRMLISTDEL(&bo_gem->head); 980 981 drm_intel_gem_bo_free(&bo_gem->bo); 982 } 983 } 984 985 bufmgr_gem->time = time; 986} 987 988static void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem) 989{ 990 int limit; 991 992 DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__, 993 bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max); 994 995 if (bufmgr_gem->vma_max < 0) 996 return; 997 998 /* We may need to evict a few entries in order to create new mmaps */ 999 limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open; 1000 if (limit < 0) 1001 limit = 0; 1002 1003 while (bufmgr_gem->vma_count > limit) { 1004 drm_intel_bo_gem *bo_gem; 1005 1006 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1007 bufmgr_gem->vma_cache.next, 1008 vma_list); 1009 assert(bo_gem->map_count == 0); 1010 DRMLISTDELINIT(&bo_gem->vma_list); 1011 1012 if (bo_gem->mem_virtual) { 1013 munmap(bo_gem->mem_virtual, bo_gem->bo.size); 1014 bo_gem->mem_virtual = NULL; 1015 bufmgr_gem->vma_count--; 1016 } 1017 if (bo_gem->gtt_virtual) { 1018 munmap(bo_gem->gtt_virtual, bo_gem->bo.size); 1019 bo_gem->gtt_virtual = NULL; 1020 bufmgr_gem->vma_count--; 1021 } 1022 } 1023} 1024 1025static void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem, 1026 drm_intel_bo_gem *bo_gem) 1027{ 1028 bufmgr_gem->vma_open--; 1029 DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache); 1030 if (bo_gem->mem_virtual) 1031 bufmgr_gem->vma_count++; 1032 if (bo_gem->gtt_virtual) 1033 bufmgr_gem->vma_count++; 1034 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 1035} 1036 1037static void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem, 1038 drm_intel_bo_gem *bo_gem) 1039{ 1040 bufmgr_gem->vma_open++; 1041 DRMLISTDEL(&bo_gem->vma_list); 1042 if (bo_gem->mem_virtual) 1043 bufmgr_gem->vma_count--; 1044 if (bo_gem->gtt_virtual) 1045 bufmgr_gem->vma_count--; 1046 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 1047} 1048 1049static void 1050drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time) 1051{ 1052 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1053 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1054 struct drm_intel_gem_bo_bucket *bucket; 1055 int i; 1056 1057 /* Unreference all the target buffers */ 1058 for (i = 0; i < bo_gem->reloc_count; i++) { 1059 if (bo_gem->reloc_target_info[i].bo != bo) { 1060 drm_intel_gem_bo_unreference_locked_timed(bo_gem-> 1061 reloc_target_info[i].bo, 1062 time); 1063 } 1064 } 1065 bo_gem->reloc_count = 0; 1066 bo_gem->used_as_reloc_target = false; 1067 1068 DBG("bo_unreference final: %d (%s)\n", 1069 bo_gem->gem_handle, bo_gem->name); 1070 1071 /* release memory associated with this object */ 1072 if (bo_gem->reloc_target_info) { 1073 free(bo_gem->reloc_target_info); 1074 bo_gem->reloc_target_info = NULL; 1075 } 1076 if (bo_gem->relocs) { 1077 free(bo_gem->relocs); 1078 bo_gem->relocs = NULL; 1079 } 1080 1081 /* Clear any left-over mappings */ 1082 if (bo_gem->map_count) { 1083 DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count); 1084 bo_gem->map_count = 0; 1085 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1086 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1087 } 1088 1089 DRMLISTDEL(&bo_gem->name_list); 1090 1091 bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size); 1092 /* Put the buffer into our internal cache for reuse if we can. */ 1093 if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL && 1094 drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem, 1095 I915_MADV_DONTNEED)) { 1096 bo_gem->free_time = time; 1097 1098 bo_gem->name = NULL; 1099 bo_gem->validate_index = -1; 1100 1101 DRMLISTADDTAIL(&bo_gem->head, &bucket->head); 1102 } else { 1103 drm_intel_gem_bo_free(bo); 1104 } 1105} 1106 1107static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo, 1108 time_t time) 1109{ 1110 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1111 1112 assert(atomic_read(&bo_gem->refcount) > 0); 1113 if (atomic_dec_and_test(&bo_gem->refcount)) 1114 drm_intel_gem_bo_unreference_final(bo, time); 1115} 1116 1117static void drm_intel_gem_bo_unreference(drm_intel_bo *bo) 1118{ 1119 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1120 1121 assert(atomic_read(&bo_gem->refcount) > 0); 1122 if (atomic_dec_and_test(&bo_gem->refcount)) { 1123 drm_intel_bufmgr_gem *bufmgr_gem = 1124 (drm_intel_bufmgr_gem *) bo->bufmgr; 1125 struct timespec time; 1126 1127 clock_gettime(CLOCK_MONOTONIC, &time); 1128 1129 pthread_mutex_lock(&bufmgr_gem->lock); 1130 drm_intel_gem_bo_unreference_final(bo, time.tv_sec); 1131 drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec); 1132 pthread_mutex_unlock(&bufmgr_gem->lock); 1133 } 1134} 1135 1136static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable) 1137{ 1138 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1139 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1140 struct drm_i915_gem_set_domain set_domain; 1141 int ret; 1142 1143 pthread_mutex_lock(&bufmgr_gem->lock); 1144 1145 if (bo_gem->map_count++ == 0) 1146 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 1147 1148 if (!bo_gem->mem_virtual) { 1149 struct drm_i915_gem_mmap mmap_arg; 1150 1151 DBG("bo_map: %d (%s), map_count=%d\n", 1152 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 1153 1154 VG_CLEAR(mmap_arg); 1155 mmap_arg.handle = bo_gem->gem_handle; 1156 mmap_arg.offset = 0; 1157 mmap_arg.size = bo->size; 1158 ret = drmIoctl(bufmgr_gem->fd, 1159 DRM_IOCTL_I915_GEM_MMAP, 1160 &mmap_arg); 1161 if (ret != 0) { 1162 ret = -errno; 1163 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 1164 __FILE__, __LINE__, bo_gem->gem_handle, 1165 bo_gem->name, strerror(errno)); 1166 if (--bo_gem->map_count == 0) 1167 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1168 pthread_mutex_unlock(&bufmgr_gem->lock); 1169 return ret; 1170 } 1171 VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1)); 1172 bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr; 1173 } 1174 DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 1175 bo_gem->mem_virtual); 1176 bo->virtual = bo_gem->mem_virtual; 1177 1178 VG_CLEAR(set_domain); 1179 set_domain.handle = bo_gem->gem_handle; 1180 set_domain.read_domains = I915_GEM_DOMAIN_CPU; 1181 if (write_enable) 1182 set_domain.write_domain = I915_GEM_DOMAIN_CPU; 1183 else 1184 set_domain.write_domain = 0; 1185 ret = drmIoctl(bufmgr_gem->fd, 1186 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1187 &set_domain); 1188 if (ret != 0) { 1189 DBG("%s:%d: Error setting to CPU domain %d: %s\n", 1190 __FILE__, __LINE__, bo_gem->gem_handle, 1191 strerror(errno)); 1192 } 1193 1194 if (write_enable) 1195 bo_gem->mapped_cpu_write = true; 1196 1197 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1198 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size)); 1199 pthread_mutex_unlock(&bufmgr_gem->lock); 1200 1201 return 0; 1202} 1203 1204static int 1205map_gtt(drm_intel_bo *bo) 1206{ 1207 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1208 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1209 int ret; 1210 1211 if (bo_gem->map_count++ == 0) 1212 drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem); 1213 1214 /* Get a mapping of the buffer if we haven't before. */ 1215 if (bo_gem->gtt_virtual == NULL) { 1216 struct drm_i915_gem_mmap_gtt mmap_arg; 1217 1218 DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n", 1219 bo_gem->gem_handle, bo_gem->name, bo_gem->map_count); 1220 1221 VG_CLEAR(mmap_arg); 1222 mmap_arg.handle = bo_gem->gem_handle; 1223 1224 /* Get the fake offset back... */ 1225 ret = drmIoctl(bufmgr_gem->fd, 1226 DRM_IOCTL_I915_GEM_MMAP_GTT, 1227 &mmap_arg); 1228 if (ret != 0) { 1229 ret = -errno; 1230 DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n", 1231 __FILE__, __LINE__, 1232 bo_gem->gem_handle, bo_gem->name, 1233 strerror(errno)); 1234 if (--bo_gem->map_count == 0) 1235 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1236 return ret; 1237 } 1238 1239 /* and mmap it */ 1240 bo_gem->gtt_virtual = mmap(0, bo->size, PROT_READ | PROT_WRITE, 1241 MAP_SHARED, bufmgr_gem->fd, 1242 mmap_arg.offset); 1243 if (bo_gem->gtt_virtual == MAP_FAILED) { 1244 bo_gem->gtt_virtual = NULL; 1245 ret = -errno; 1246 DBG("%s:%d: Error mapping buffer %d (%s): %s .\n", 1247 __FILE__, __LINE__, 1248 bo_gem->gem_handle, bo_gem->name, 1249 strerror(errno)); 1250 if (--bo_gem->map_count == 0) 1251 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1252 return ret; 1253 } 1254 } 1255 1256 bo->virtual = bo_gem->gtt_virtual; 1257 1258 DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, 1259 bo_gem->gtt_virtual); 1260 1261 return 0; 1262} 1263 1264int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo) 1265{ 1266 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1267 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1268 struct drm_i915_gem_set_domain set_domain; 1269 int ret; 1270 1271 pthread_mutex_lock(&bufmgr_gem->lock); 1272 1273 ret = map_gtt(bo); 1274 if (ret) { 1275 pthread_mutex_unlock(&bufmgr_gem->lock); 1276 return ret; 1277 } 1278 1279 /* Now move it to the GTT domain so that the GPU and CPU 1280 * caches are flushed and the GPU isn't actively using the 1281 * buffer. 1282 * 1283 * The pagefault handler does this domain change for us when 1284 * it has unbound the BO from the GTT, but it's up to us to 1285 * tell it when we're about to use things if we had done 1286 * rendering and it still happens to be bound to the GTT. 1287 */ 1288 VG_CLEAR(set_domain); 1289 set_domain.handle = bo_gem->gem_handle; 1290 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1291 set_domain.write_domain = I915_GEM_DOMAIN_GTT; 1292 ret = drmIoctl(bufmgr_gem->fd, 1293 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1294 &set_domain); 1295 if (ret != 0) { 1296 DBG("%s:%d: Error setting domain %d: %s\n", 1297 __FILE__, __LINE__, bo_gem->gem_handle, 1298 strerror(errno)); 1299 } 1300 1301 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1302 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); 1303 pthread_mutex_unlock(&bufmgr_gem->lock); 1304 1305 return 0; 1306} 1307 1308/** 1309 * Performs a mapping of the buffer object like the normal GTT 1310 * mapping, but avoids waiting for the GPU to be done reading from or 1311 * rendering to the buffer. 1312 * 1313 * This is used in the implementation of GL_ARB_map_buffer_range: The 1314 * user asks to create a buffer, then does a mapping, fills some 1315 * space, runs a drawing command, then asks to map it again without 1316 * synchronizing because it guarantees that it won't write over the 1317 * data that the GPU is busy using (or, more specifically, that if it 1318 * does write over the data, it acknowledges that rendering is 1319 * undefined). 1320 */ 1321 1322int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo) 1323{ 1324 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1325 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1326 int ret; 1327 1328 /* If the CPU cache isn't coherent with the GTT, then use a 1329 * regular synchronized mapping. The problem is that we don't 1330 * track where the buffer was last used on the CPU side in 1331 * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so 1332 * we would potentially corrupt the buffer even when the user 1333 * does reasonable things. 1334 */ 1335 if (!bufmgr_gem->has_llc) 1336 return drm_intel_gem_bo_map_gtt(bo); 1337 1338 pthread_mutex_lock(&bufmgr_gem->lock); 1339 1340 ret = map_gtt(bo); 1341 if (ret == 0) { 1342 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1343 VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size)); 1344 } 1345 1346 pthread_mutex_unlock(&bufmgr_gem->lock); 1347 1348 return ret; 1349} 1350 1351static int drm_intel_gem_bo_unmap(drm_intel_bo *bo) 1352{ 1353 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1354 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1355 int ret = 0; 1356 1357 if (bo == NULL) 1358 return 0; 1359 1360 pthread_mutex_lock(&bufmgr_gem->lock); 1361 1362 if (bo_gem->map_count <= 0) { 1363 DBG("attempted to unmap an unmapped bo\n"); 1364 pthread_mutex_unlock(&bufmgr_gem->lock); 1365 /* Preserve the old behaviour of just treating this as a 1366 * no-op rather than reporting the error. 1367 */ 1368 return 0; 1369 } 1370 1371 if (bo_gem->mapped_cpu_write) { 1372 struct drm_i915_gem_sw_finish sw_finish; 1373 1374 /* Cause a flush to happen if the buffer's pinned for 1375 * scanout, so the results show up in a timely manner. 1376 * Unlike GTT set domains, this only does work if the 1377 * buffer should be scanout-related. 1378 */ 1379 VG_CLEAR(sw_finish); 1380 sw_finish.handle = bo_gem->gem_handle; 1381 ret = drmIoctl(bufmgr_gem->fd, 1382 DRM_IOCTL_I915_GEM_SW_FINISH, 1383 &sw_finish); 1384 ret = ret == -1 ? -errno : 0; 1385 1386 bo_gem->mapped_cpu_write = false; 1387 } 1388 1389 /* We need to unmap after every innovation as we cannot track 1390 * an open vma for every bo as that will exhaasut the system 1391 * limits and cause later failures. 1392 */ 1393 if (--bo_gem->map_count == 0) { 1394 drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem); 1395 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1396 bo->virtual = NULL; 1397 } 1398 pthread_mutex_unlock(&bufmgr_gem->lock); 1399 1400 return ret; 1401} 1402 1403int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo) 1404{ 1405 return drm_intel_gem_bo_unmap(bo); 1406} 1407 1408static int 1409drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset, 1410 unsigned long size, const void *data) 1411{ 1412 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1413 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1414 struct drm_i915_gem_pwrite pwrite; 1415 int ret; 1416 1417 VG_CLEAR(pwrite); 1418 pwrite.handle = bo_gem->gem_handle; 1419 pwrite.offset = offset; 1420 pwrite.size = size; 1421 pwrite.data_ptr = (uint64_t) (uintptr_t) data; 1422 ret = drmIoctl(bufmgr_gem->fd, 1423 DRM_IOCTL_I915_GEM_PWRITE, 1424 &pwrite); 1425 if (ret != 0) { 1426 ret = -errno; 1427 DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n", 1428 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1429 (int)size, strerror(errno)); 1430 } 1431 1432 return ret; 1433} 1434 1435static int 1436drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id) 1437{ 1438 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1439 struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id; 1440 int ret; 1441 1442 VG_CLEAR(get_pipe_from_crtc_id); 1443 get_pipe_from_crtc_id.crtc_id = crtc_id; 1444 ret = drmIoctl(bufmgr_gem->fd, 1445 DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, 1446 &get_pipe_from_crtc_id); 1447 if (ret != 0) { 1448 /* We return -1 here to signal that we don't 1449 * know which pipe is associated with this crtc. 1450 * This lets the caller know that this information 1451 * isn't available; using the wrong pipe for 1452 * vblank waiting can cause the chipset to lock up 1453 */ 1454 return -1; 1455 } 1456 1457 return get_pipe_from_crtc_id.pipe; 1458} 1459 1460static int 1461drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset, 1462 unsigned long size, void *data) 1463{ 1464 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1465 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1466 struct drm_i915_gem_pread pread; 1467 int ret; 1468 1469 VG_CLEAR(pread); 1470 pread.handle = bo_gem->gem_handle; 1471 pread.offset = offset; 1472 pread.size = size; 1473 pread.data_ptr = (uint64_t) (uintptr_t) data; 1474 ret = drmIoctl(bufmgr_gem->fd, 1475 DRM_IOCTL_I915_GEM_PREAD, 1476 &pread); 1477 if (ret != 0) { 1478 ret = -errno; 1479 DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n", 1480 __FILE__, __LINE__, bo_gem->gem_handle, (int)offset, 1481 (int)size, strerror(errno)); 1482 } 1483 1484 return ret; 1485} 1486 1487/** Waits for all GPU rendering with the object to have completed. */ 1488static void 1489drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo) 1490{ 1491 drm_intel_gem_bo_start_gtt_access(bo, 1); 1492} 1493 1494/** 1495 * Waits on a BO for the given amount of time. 1496 * 1497 * @bo: buffer object to wait for 1498 * @timeout_ns: amount of time to wait in nanoseconds. 1499 * If value is less than 0, an infinite wait will occur. 1500 * 1501 * Returns 0 if the wait was successful ie. the last batch referencing the 1502 * object has completed within the allotted time. Otherwise some negative return 1503 * value describes the error. Of particular interest is -ETIME when the wait has 1504 * failed to yield the desired result. 1505 * 1506 * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows 1507 * the operation to give up after a certain amount of time. Another subtle 1508 * difference is the internal locking semantics are different (this variant does 1509 * not hold the lock for the duration of the wait). This makes the wait subject 1510 * to a larger userspace race window. 1511 * 1512 * The implementation shall wait until the object is no longer actively 1513 * referenced within a batch buffer at the time of the call. The wait will 1514 * not guarantee that the buffer is re-issued via another thread, or an flinked 1515 * handle. Userspace must make sure this race does not occur if such precision 1516 * is important. 1517 */ 1518int drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns) 1519{ 1520 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1521 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1522 struct drm_i915_gem_wait wait; 1523 int ret; 1524 1525 if (!bufmgr_gem->has_wait_timeout) { 1526 DBG("%s:%d: Timed wait is not supported. Falling back to " 1527 "infinite wait\n", __FILE__, __LINE__); 1528 if (timeout_ns) { 1529 drm_intel_gem_bo_wait_rendering(bo); 1530 return 0; 1531 } else { 1532 return drm_intel_gem_bo_busy(bo) ? -ETIME : 0; 1533 } 1534 } 1535 1536 wait.bo_handle = bo_gem->gem_handle; 1537 wait.timeout_ns = timeout_ns; 1538 wait.flags = 0; 1539 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait); 1540 if (ret == -1) 1541 return -errno; 1542 1543 return ret; 1544} 1545 1546/** 1547 * Sets the object to the GTT read and possibly write domain, used by the X 1548 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt(). 1549 * 1550 * In combination with drm_intel_gem_bo_pin() and manual fence management, we 1551 * can do tiled pixmaps this way. 1552 */ 1553void 1554drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable) 1555{ 1556 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1557 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1558 struct drm_i915_gem_set_domain set_domain; 1559 int ret; 1560 1561 VG_CLEAR(set_domain); 1562 set_domain.handle = bo_gem->gem_handle; 1563 set_domain.read_domains = I915_GEM_DOMAIN_GTT; 1564 set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0; 1565 ret = drmIoctl(bufmgr_gem->fd, 1566 DRM_IOCTL_I915_GEM_SET_DOMAIN, 1567 &set_domain); 1568 if (ret != 0) { 1569 DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n", 1570 __FILE__, __LINE__, bo_gem->gem_handle, 1571 set_domain.read_domains, set_domain.write_domain, 1572 strerror(errno)); 1573 } 1574} 1575 1576static void 1577drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr) 1578{ 1579 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 1580 int i; 1581 1582 free(bufmgr_gem->exec2_objects); 1583 free(bufmgr_gem->exec_objects); 1584 free(bufmgr_gem->exec_bos); 1585 free(bufmgr_gem->aub_filename); 1586 1587 pthread_mutex_destroy(&bufmgr_gem->lock); 1588 1589 /* Free any cached buffer objects we were going to reuse */ 1590 for (i = 0; i < bufmgr_gem->num_buckets; i++) { 1591 struct drm_intel_gem_bo_bucket *bucket = 1592 &bufmgr_gem->cache_bucket[i]; 1593 drm_intel_bo_gem *bo_gem; 1594 1595 while (!DRMLISTEMPTY(&bucket->head)) { 1596 bo_gem = DRMLISTENTRY(drm_intel_bo_gem, 1597 bucket->head.next, head); 1598 DRMLISTDEL(&bo_gem->head); 1599 1600 drm_intel_gem_bo_free(&bo_gem->bo); 1601 } 1602 } 1603 1604 free(bufmgr); 1605} 1606 1607/** 1608 * Adds the target buffer to the validation list and adds the relocation 1609 * to the reloc_buffer's relocation list. 1610 * 1611 * The relocation entry at the given offset must already contain the 1612 * precomputed relocation value, because the kernel will optimize out 1613 * the relocation entry write when the buffer hasn't moved from the 1614 * last known offset in target_bo. 1615 */ 1616static int 1617do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 1618 drm_intel_bo *target_bo, uint32_t target_offset, 1619 uint32_t read_domains, uint32_t write_domain, 1620 bool need_fence) 1621{ 1622 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1623 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1624 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 1625 bool fenced_command; 1626 1627 if (bo_gem->has_error) 1628 return -ENOMEM; 1629 1630 if (target_bo_gem->has_error) { 1631 bo_gem->has_error = true; 1632 return -ENOMEM; 1633 } 1634 1635 /* We never use HW fences for rendering on 965+ */ 1636 if (bufmgr_gem->gen >= 4) 1637 need_fence = false; 1638 1639 fenced_command = need_fence; 1640 if (target_bo_gem->tiling_mode == I915_TILING_NONE) 1641 need_fence = false; 1642 1643 /* Create a new relocation list if needed */ 1644 if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo)) 1645 return -ENOMEM; 1646 1647 /* Check overflow */ 1648 assert(bo_gem->reloc_count < bufmgr_gem->max_relocs); 1649 1650 /* Check args */ 1651 assert(offset <= bo->size - 4); 1652 assert((write_domain & (write_domain - 1)) == 0); 1653 1654 /* Make sure that we're not adding a reloc to something whose size has 1655 * already been accounted for. 1656 */ 1657 assert(!bo_gem->used_as_reloc_target); 1658 if (target_bo_gem != bo_gem) { 1659 target_bo_gem->used_as_reloc_target = true; 1660 bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size; 1661 } 1662 /* An object needing a fence is a tiled buffer, so it won't have 1663 * relocs to other buffers. 1664 */ 1665 if (need_fence) 1666 target_bo_gem->reloc_tree_fences = 1; 1667 bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences; 1668 1669 bo_gem->relocs[bo_gem->reloc_count].offset = offset; 1670 bo_gem->relocs[bo_gem->reloc_count].delta = target_offset; 1671 bo_gem->relocs[bo_gem->reloc_count].target_handle = 1672 target_bo_gem->gem_handle; 1673 bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains; 1674 bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain; 1675 bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset; 1676 1677 bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo; 1678 if (target_bo != bo) 1679 drm_intel_gem_bo_reference(target_bo); 1680 if (fenced_command) 1681 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 1682 DRM_INTEL_RELOC_FENCE; 1683 else 1684 bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0; 1685 1686 bo_gem->reloc_count++; 1687 1688 return 0; 1689} 1690 1691static int 1692drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset, 1693 drm_intel_bo *target_bo, uint32_t target_offset, 1694 uint32_t read_domains, uint32_t write_domain) 1695{ 1696 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 1697 1698 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 1699 read_domains, write_domain, 1700 !bufmgr_gem->fenced_relocs); 1701} 1702 1703static int 1704drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset, 1705 drm_intel_bo *target_bo, 1706 uint32_t target_offset, 1707 uint32_t read_domains, uint32_t write_domain) 1708{ 1709 return do_bo_emit_reloc(bo, offset, target_bo, target_offset, 1710 read_domains, write_domain, true); 1711} 1712 1713int 1714drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo) 1715{ 1716 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1717 1718 return bo_gem->reloc_count; 1719} 1720 1721/** 1722 * Removes existing relocation entries in the BO after "start". 1723 * 1724 * This allows a user to avoid a two-step process for state setup with 1725 * counting up all the buffer objects and doing a 1726 * drm_intel_bufmgr_check_aperture_space() before emitting any of the 1727 * relocations for the state setup. Instead, save the state of the 1728 * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the 1729 * state, and then check if it still fits in the aperture. 1730 * 1731 * Any further drm_intel_bufmgr_check_aperture_space() queries 1732 * involving this buffer in the tree are undefined after this call. 1733 */ 1734void 1735drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start) 1736{ 1737 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1738 int i; 1739 struct timespec time; 1740 1741 clock_gettime(CLOCK_MONOTONIC, &time); 1742 1743 assert(bo_gem->reloc_count >= start); 1744 /* Unreference the cleared target buffers */ 1745 for (i = start; i < bo_gem->reloc_count; i++) { 1746 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo; 1747 if (&target_bo_gem->bo != bo) { 1748 bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences; 1749 drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo, 1750 time.tv_sec); 1751 } 1752 } 1753 bo_gem->reloc_count = start; 1754} 1755 1756/** 1757 * Walk the tree of relocations rooted at BO and accumulate the list of 1758 * validations to be performed and update the relocation buffers with 1759 * index values into the validation list. 1760 */ 1761static void 1762drm_intel_gem_bo_process_reloc(drm_intel_bo *bo) 1763{ 1764 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1765 int i; 1766 1767 if (bo_gem->relocs == NULL) 1768 return; 1769 1770 for (i = 0; i < bo_gem->reloc_count; i++) { 1771 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 1772 1773 if (target_bo == bo) 1774 continue; 1775 1776 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1777 1778 /* Continue walking the tree depth-first. */ 1779 drm_intel_gem_bo_process_reloc(target_bo); 1780 1781 /* Add the target to the validate list */ 1782 drm_intel_add_validate_buffer(target_bo); 1783 } 1784} 1785 1786static void 1787drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo) 1788{ 1789 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1790 int i; 1791 1792 if (bo_gem->relocs == NULL) 1793 return; 1794 1795 for (i = 0; i < bo_gem->reloc_count; i++) { 1796 drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo; 1797 int need_fence; 1798 1799 if (target_bo == bo) 1800 continue; 1801 1802 drm_intel_gem_bo_mark_mmaps_incoherent(bo); 1803 1804 /* Continue walking the tree depth-first. */ 1805 drm_intel_gem_bo_process_reloc2(target_bo); 1806 1807 need_fence = (bo_gem->reloc_target_info[i].flags & 1808 DRM_INTEL_RELOC_FENCE); 1809 1810 /* Add the target to the validate list */ 1811 drm_intel_add_validate_buffer2(target_bo, need_fence); 1812 } 1813} 1814 1815 1816static void 1817drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem) 1818{ 1819 int i; 1820 1821 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1822 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1823 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1824 1825 /* Update the buffer offset */ 1826 if (bufmgr_gem->exec_objects[i].offset != bo->offset) { 1827 DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 1828 bo_gem->gem_handle, bo_gem->name, bo->offset, 1829 (unsigned long long)bufmgr_gem->exec_objects[i]. 1830 offset); 1831 bo->offset = bufmgr_gem->exec_objects[i].offset; 1832 } 1833 } 1834} 1835 1836static void 1837drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem) 1838{ 1839 int i; 1840 1841 for (i = 0; i < bufmgr_gem->exec_count; i++) { 1842 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 1843 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 1844 1845 /* Update the buffer offset */ 1846 if (bufmgr_gem->exec2_objects[i].offset != bo->offset) { 1847 DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n", 1848 bo_gem->gem_handle, bo_gem->name, bo->offset, 1849 (unsigned long long)bufmgr_gem->exec2_objects[i].offset); 1850 bo->offset = bufmgr_gem->exec2_objects[i].offset; 1851 } 1852 } 1853} 1854 1855static void 1856aub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data) 1857{ 1858 fwrite(&data, 1, 4, bufmgr_gem->aub_file); 1859} 1860 1861static void 1862aub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size) 1863{ 1864 fwrite(data, 1, size, bufmgr_gem->aub_file); 1865} 1866 1867static void 1868aub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size) 1869{ 1870 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1871 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1872 uint32_t *data; 1873 unsigned int i; 1874 1875 data = malloc(bo->size); 1876 drm_intel_bo_get_subdata(bo, offset, size, data); 1877 1878 /* Easy mode: write out bo with no relocations */ 1879 if (!bo_gem->reloc_count) { 1880 aub_out_data(bufmgr_gem, data, size); 1881 free(data); 1882 return; 1883 } 1884 1885 /* Otherwise, handle the relocations while writing. */ 1886 for (i = 0; i < size / 4; i++) { 1887 int r; 1888 for (r = 0; r < bo_gem->reloc_count; r++) { 1889 struct drm_i915_gem_relocation_entry *reloc; 1890 drm_intel_reloc_target *info; 1891 1892 reloc = &bo_gem->relocs[r]; 1893 info = &bo_gem->reloc_target_info[r]; 1894 1895 if (reloc->offset == offset + i * 4) { 1896 drm_intel_bo_gem *target_gem; 1897 uint32_t val; 1898 1899 target_gem = (drm_intel_bo_gem *)info->bo; 1900 1901 val = reloc->delta; 1902 val += target_gem->aub_offset; 1903 1904 aub_out(bufmgr_gem, val); 1905 data[i] = val; 1906 break; 1907 } 1908 } 1909 if (r == bo_gem->reloc_count) { 1910 /* no relocation, just the data */ 1911 aub_out(bufmgr_gem, data[i]); 1912 } 1913 } 1914 1915 free(data); 1916} 1917 1918static void 1919aub_bo_get_address(drm_intel_bo *bo) 1920{ 1921 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1922 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1923 1924 /* Give the object a graphics address in the AUB file. We 1925 * don't just use the GEM object address because we do AUB 1926 * dumping before execution -- we want to successfully log 1927 * when the hardware might hang, and we might even want to aub 1928 * capture for a driver trying to execute on a different 1929 * generation of hardware by disabling the actual kernel exec 1930 * call. 1931 */ 1932 bo_gem->aub_offset = bufmgr_gem->aub_offset; 1933 bufmgr_gem->aub_offset += bo->size; 1934 /* XXX: Handle aperture overflow. */ 1935 assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024); 1936} 1937 1938static void 1939aub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype, 1940 uint32_t offset, uint32_t size) 1941{ 1942 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 1943 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1944 1945 aub_out(bufmgr_gem, 1946 CMD_AUB_TRACE_HEADER_BLOCK | 1947 ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); 1948 aub_out(bufmgr_gem, 1949 AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE); 1950 aub_out(bufmgr_gem, subtype); 1951 aub_out(bufmgr_gem, bo_gem->aub_offset + offset); 1952 aub_out(bufmgr_gem, size); 1953 if (bufmgr_gem->gen >= 8) 1954 aub_out(bufmgr_gem, 0); 1955 aub_write_bo_data(bo, offset, size); 1956} 1957 1958/** 1959 * Break up large objects into multiple writes. Otherwise a 128kb VBO 1960 * would overflow the 16 bits of size field in the packet header and 1961 * everything goes badly after that. 1962 */ 1963static void 1964aub_write_large_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype, 1965 uint32_t offset, uint32_t size) 1966{ 1967 uint32_t block_size; 1968 uint32_t sub_offset; 1969 1970 for (sub_offset = 0; sub_offset < size; sub_offset += block_size) { 1971 block_size = size - sub_offset; 1972 1973 if (block_size > 8 * 4096) 1974 block_size = 8 * 4096; 1975 1976 aub_write_trace_block(bo, type, subtype, offset + sub_offset, 1977 block_size); 1978 } 1979} 1980 1981static void 1982aub_write_bo(drm_intel_bo *bo) 1983{ 1984 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 1985 uint32_t offset = 0; 1986 unsigned i; 1987 1988 aub_bo_get_address(bo); 1989 1990 /* Write out each annotated section separately. */ 1991 for (i = 0; i < bo_gem->aub_annotation_count; ++i) { 1992 drm_intel_aub_annotation *annotation = 1993 &bo_gem->aub_annotations[i]; 1994 uint32_t ending_offset = annotation->ending_offset; 1995 if (ending_offset > bo->size) 1996 ending_offset = bo->size; 1997 if (ending_offset > offset) { 1998 aub_write_large_trace_block(bo, annotation->type, 1999 annotation->subtype, 2000 offset, 2001 ending_offset - offset); 2002 offset = ending_offset; 2003 } 2004 } 2005 2006 /* Write out any remaining unannotated data */ 2007 if (offset < bo->size) { 2008 aub_write_large_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0, 2009 offset, bo->size - offset); 2010 } 2011} 2012 2013/* 2014 * Make a ringbuffer on fly and dump it 2015 */ 2016static void 2017aub_build_dump_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem, 2018 uint32_t batch_buffer, int ring_flag) 2019{ 2020 uint32_t ringbuffer[4096]; 2021 int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */ 2022 int ring_count = 0; 2023 2024 if (ring_flag == I915_EXEC_BSD) 2025 ring = AUB_TRACE_TYPE_RING_PRB1; 2026 else if (ring_flag == I915_EXEC_BLT) 2027 ring = AUB_TRACE_TYPE_RING_PRB2; 2028 2029 /* Make a ring buffer to execute our batchbuffer. */ 2030 memset(ringbuffer, 0, sizeof(ringbuffer)); 2031 if (bufmgr_gem->gen >= 8) { 2032 ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2); 2033 ringbuffer[ring_count++] = batch_buffer; 2034 ringbuffer[ring_count++] = 0; 2035 } else { 2036 ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START; 2037 ringbuffer[ring_count++] = batch_buffer; 2038 } 2039 2040 /* Write out the ring. This appears to trigger execution of 2041 * the ring in the simulator. 2042 */ 2043 aub_out(bufmgr_gem, 2044 CMD_AUB_TRACE_HEADER_BLOCK | 2045 ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); 2046 aub_out(bufmgr_gem, 2047 AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE); 2048 aub_out(bufmgr_gem, 0); /* general/surface subtype */ 2049 aub_out(bufmgr_gem, bufmgr_gem->aub_offset); 2050 aub_out(bufmgr_gem, ring_count * 4); 2051 if (bufmgr_gem->gen >= 8) 2052 aub_out(bufmgr_gem, 0); 2053 2054 /* FIXME: Need some flush operations here? */ 2055 aub_out_data(bufmgr_gem, ringbuffer, ring_count * 4); 2056 2057 /* Update offset pointer */ 2058 bufmgr_gem->aub_offset += 4096; 2059} 2060 2061void 2062drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo, 2063 int x1, int y1, int width, int height, 2064 enum aub_dump_bmp_format format, 2065 int pitch, int offset) 2066{ 2067 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2068 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 2069 uint32_t cpp; 2070 2071 switch (format) { 2072 case AUB_DUMP_BMP_FORMAT_8BIT: 2073 cpp = 1; 2074 break; 2075 case AUB_DUMP_BMP_FORMAT_ARGB_4444: 2076 cpp = 2; 2077 break; 2078 case AUB_DUMP_BMP_FORMAT_ARGB_0888: 2079 case AUB_DUMP_BMP_FORMAT_ARGB_8888: 2080 cpp = 4; 2081 break; 2082 default: 2083 printf("Unknown AUB dump format %d\n", format); 2084 return; 2085 } 2086 2087 if (!bufmgr_gem->aub_file) 2088 return; 2089 2090 aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4); 2091 aub_out(bufmgr_gem, (y1 << 16) | x1); 2092 aub_out(bufmgr_gem, 2093 (format << 24) | 2094 (cpp << 19) | 2095 pitch / 4); 2096 aub_out(bufmgr_gem, (height << 16) | width); 2097 aub_out(bufmgr_gem, bo_gem->aub_offset + offset); 2098 aub_out(bufmgr_gem, 2099 ((bo_gem->tiling_mode != I915_TILING_NONE) ? (1 << 2) : 0) | 2100 ((bo_gem->tiling_mode == I915_TILING_Y) ? (1 << 3) : 0)); 2101} 2102 2103static void 2104aub_exec(drm_intel_bo *bo, int ring_flag, int used) 2105{ 2106 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2107 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2108 int i; 2109 bool batch_buffer_needs_annotations; 2110 2111 if (!bufmgr_gem->aub_file) 2112 return; 2113 2114 /* If batch buffer is not annotated, annotate it the best we 2115 * can. 2116 */ 2117 batch_buffer_needs_annotations = bo_gem->aub_annotation_count == 0; 2118 if (batch_buffer_needs_annotations) { 2119 drm_intel_aub_annotation annotations[2] = { 2120 { AUB_TRACE_TYPE_BATCH, 0, used }, 2121 { AUB_TRACE_TYPE_NOTYPE, 0, bo->size } 2122 }; 2123 drm_intel_bufmgr_gem_set_aub_annotations(bo, annotations, 2); 2124 } 2125 2126 /* Write out all buffers to AUB memory */ 2127 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2128 aub_write_bo(bufmgr_gem->exec_bos[i]); 2129 } 2130 2131 /* Remove any annotations we added */ 2132 if (batch_buffer_needs_annotations) 2133 drm_intel_bufmgr_gem_set_aub_annotations(bo, NULL, 0); 2134 2135 /* Dump ring buffer */ 2136 aub_build_dump_ringbuffer(bufmgr_gem, bo_gem->aub_offset, ring_flag); 2137 2138 fflush(bufmgr_gem->aub_file); 2139 2140 /* 2141 * One frame has been dumped. So reset the aub_offset for the next frame. 2142 * 2143 * FIXME: Can we do this? 2144 */ 2145 bufmgr_gem->aub_offset = 0x10000; 2146} 2147 2148static int 2149drm_intel_gem_bo_exec(drm_intel_bo *bo, int used, 2150 drm_clip_rect_t * cliprects, int num_cliprects, int DR4) 2151{ 2152 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2153 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2154 struct drm_i915_gem_execbuffer execbuf; 2155 int ret, i; 2156 2157 if (bo_gem->has_error) 2158 return -ENOMEM; 2159 2160 pthread_mutex_lock(&bufmgr_gem->lock); 2161 /* Update indices and set up the validate list. */ 2162 drm_intel_gem_bo_process_reloc(bo); 2163 2164 /* Add the batch buffer to the validation list. There are no 2165 * relocations pointing to it. 2166 */ 2167 drm_intel_add_validate_buffer(bo); 2168 2169 VG_CLEAR(execbuf); 2170 execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects; 2171 execbuf.buffer_count = bufmgr_gem->exec_count; 2172 execbuf.batch_start_offset = 0; 2173 execbuf.batch_len = used; 2174 execbuf.cliprects_ptr = (uintptr_t) cliprects; 2175 execbuf.num_cliprects = num_cliprects; 2176 execbuf.DR1 = 0; 2177 execbuf.DR4 = DR4; 2178 2179 ret = drmIoctl(bufmgr_gem->fd, 2180 DRM_IOCTL_I915_GEM_EXECBUFFER, 2181 &execbuf); 2182 if (ret != 0) { 2183 ret = -errno; 2184 if (errno == ENOSPC) { 2185 DBG("Execbuffer fails to pin. " 2186 "Estimate: %u. Actual: %u. Available: %u\n", 2187 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 2188 bufmgr_gem-> 2189 exec_count), 2190 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 2191 bufmgr_gem-> 2192 exec_count), 2193 (unsigned int)bufmgr_gem->gtt_size); 2194 } 2195 } 2196 drm_intel_update_buffer_offsets(bufmgr_gem); 2197 2198 if (bufmgr_gem->bufmgr.debug) 2199 drm_intel_gem_dump_validation_list(bufmgr_gem); 2200 2201 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2202 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 2203 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2204 2205 /* Disconnect the buffer from the validate list */ 2206 bo_gem->validate_index = -1; 2207 bufmgr_gem->exec_bos[i] = NULL; 2208 } 2209 bufmgr_gem->exec_count = 0; 2210 pthread_mutex_unlock(&bufmgr_gem->lock); 2211 2212 return ret; 2213} 2214 2215static int 2216do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx, 2217 drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 2218 unsigned int flags) 2219{ 2220 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr; 2221 struct drm_i915_gem_execbuffer2 execbuf; 2222 int ret = 0; 2223 int i; 2224 2225 switch (flags & 0x7) { 2226 default: 2227 return -EINVAL; 2228 case I915_EXEC_BLT: 2229 if (!bufmgr_gem->has_blt) 2230 return -EINVAL; 2231 break; 2232 case I915_EXEC_BSD: 2233 if (!bufmgr_gem->has_bsd) 2234 return -EINVAL; 2235 break; 2236 case I915_EXEC_VEBOX: 2237 if (!bufmgr_gem->has_vebox) 2238 return -EINVAL; 2239 break; 2240 case I915_EXEC_RENDER: 2241 case I915_EXEC_DEFAULT: 2242 break; 2243 } 2244 2245 pthread_mutex_lock(&bufmgr_gem->lock); 2246 /* Update indices and set up the validate list. */ 2247 drm_intel_gem_bo_process_reloc2(bo); 2248 2249 /* Add the batch buffer to the validation list. There are no relocations 2250 * pointing to it. 2251 */ 2252 drm_intel_add_validate_buffer2(bo, 0); 2253 2254 VG_CLEAR(execbuf); 2255 execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects; 2256 execbuf.buffer_count = bufmgr_gem->exec_count; 2257 execbuf.batch_start_offset = 0; 2258 execbuf.batch_len = used; 2259 execbuf.cliprects_ptr = (uintptr_t)cliprects; 2260 execbuf.num_cliprects = num_cliprects; 2261 execbuf.DR1 = 0; 2262 execbuf.DR4 = DR4; 2263 execbuf.flags = flags; 2264 if (ctx == NULL) 2265 i915_execbuffer2_set_context_id(execbuf, 0); 2266 else 2267 i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id); 2268 execbuf.rsvd2 = 0; 2269 2270 aub_exec(bo, flags, used); 2271 2272 if (bufmgr_gem->no_exec) 2273 goto skip_execution; 2274 2275 ret = drmIoctl(bufmgr_gem->fd, 2276 DRM_IOCTL_I915_GEM_EXECBUFFER2, 2277 &execbuf); 2278 if (ret != 0) { 2279 ret = -errno; 2280 if (ret == -ENOSPC) { 2281 DBG("Execbuffer fails to pin. " 2282 "Estimate: %u. Actual: %u. Available: %u\n", 2283 drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos, 2284 bufmgr_gem->exec_count), 2285 drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos, 2286 bufmgr_gem->exec_count), 2287 (unsigned int) bufmgr_gem->gtt_size); 2288 } 2289 } 2290 drm_intel_update_buffer_offsets2(bufmgr_gem); 2291 2292skip_execution: 2293 if (bufmgr_gem->bufmgr.debug) 2294 drm_intel_gem_dump_validation_list(bufmgr_gem); 2295 2296 for (i = 0; i < bufmgr_gem->exec_count; i++) { 2297 drm_intel_bo *bo = bufmgr_gem->exec_bos[i]; 2298 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo; 2299 2300 /* Disconnect the buffer from the validate list */ 2301 bo_gem->validate_index = -1; 2302 bufmgr_gem->exec_bos[i] = NULL; 2303 } 2304 bufmgr_gem->exec_count = 0; 2305 pthread_mutex_unlock(&bufmgr_gem->lock); 2306 2307 return ret; 2308} 2309 2310static int 2311drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used, 2312 drm_clip_rect_t *cliprects, int num_cliprects, 2313 int DR4) 2314{ 2315 return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, 2316 I915_EXEC_RENDER); 2317} 2318 2319static int 2320drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used, 2321 drm_clip_rect_t *cliprects, int num_cliprects, int DR4, 2322 unsigned int flags) 2323{ 2324 return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4, 2325 flags); 2326} 2327 2328int 2329drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx, 2330 int used, unsigned int flags) 2331{ 2332 return do_exec2(bo, used, ctx, NULL, 0, 0, flags); 2333} 2334 2335static int 2336drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment) 2337{ 2338 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2339 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2340 struct drm_i915_gem_pin pin; 2341 int ret; 2342 2343 VG_CLEAR(pin); 2344 pin.handle = bo_gem->gem_handle; 2345 pin.alignment = alignment; 2346 2347 ret = drmIoctl(bufmgr_gem->fd, 2348 DRM_IOCTL_I915_GEM_PIN, 2349 &pin); 2350 if (ret != 0) 2351 return -errno; 2352 2353 bo->offset = pin.offset; 2354 return 0; 2355} 2356 2357static int 2358drm_intel_gem_bo_unpin(drm_intel_bo *bo) 2359{ 2360 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2361 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2362 struct drm_i915_gem_unpin unpin; 2363 int ret; 2364 2365 VG_CLEAR(unpin); 2366 unpin.handle = bo_gem->gem_handle; 2367 2368 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); 2369 if (ret != 0) 2370 return -errno; 2371 2372 return 0; 2373} 2374 2375static int 2376drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo, 2377 uint32_t tiling_mode, 2378 uint32_t stride) 2379{ 2380 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2381 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2382 struct drm_i915_gem_set_tiling set_tiling; 2383 int ret; 2384 2385 if (bo_gem->global_name == 0 && 2386 tiling_mode == bo_gem->tiling_mode && 2387 stride == bo_gem->stride) 2388 return 0; 2389 2390 memset(&set_tiling, 0, sizeof(set_tiling)); 2391 do { 2392 /* set_tiling is slightly broken and overwrites the 2393 * input on the error path, so we have to open code 2394 * rmIoctl. 2395 */ 2396 set_tiling.handle = bo_gem->gem_handle; 2397 set_tiling.tiling_mode = tiling_mode; 2398 set_tiling.stride = stride; 2399 2400 ret = ioctl(bufmgr_gem->fd, 2401 DRM_IOCTL_I915_GEM_SET_TILING, 2402 &set_tiling); 2403 } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); 2404 if (ret == -1) 2405 return -errno; 2406 2407 bo_gem->tiling_mode = set_tiling.tiling_mode; 2408 bo_gem->swizzle_mode = set_tiling.swizzle_mode; 2409 bo_gem->stride = set_tiling.stride; 2410 return 0; 2411} 2412 2413static int 2414drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 2415 uint32_t stride) 2416{ 2417 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2418 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2419 int ret; 2420 2421 /* Linear buffers have no stride. By ensuring that we only ever use 2422 * stride 0 with linear buffers, we simplify our code. 2423 */ 2424 if (*tiling_mode == I915_TILING_NONE) 2425 stride = 0; 2426 2427 ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride); 2428 if (ret == 0) 2429 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 2430 2431 *tiling_mode = bo_gem->tiling_mode; 2432 return ret; 2433} 2434 2435static int 2436drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode, 2437 uint32_t * swizzle_mode) 2438{ 2439 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2440 2441 *tiling_mode = bo_gem->tiling_mode; 2442 *swizzle_mode = bo_gem->swizzle_mode; 2443 return 0; 2444} 2445 2446drm_intel_bo * 2447drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size) 2448{ 2449 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 2450 int ret; 2451 uint32_t handle; 2452 drm_intel_bo_gem *bo_gem; 2453 struct drm_i915_gem_get_tiling get_tiling; 2454 2455 ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle); 2456 if (ret) { 2457 fprintf(stderr,"ret is %d %d\n", ret, errno); 2458 return NULL; 2459 } 2460 2461 bo_gem = calloc(1, sizeof(*bo_gem)); 2462 if (!bo_gem) 2463 return NULL; 2464 2465 /* Determine size of bo. The fd-to-handle ioctl really should 2466 * return the size, but it doesn't. If we have kernel 3.12 or 2467 * later, we can lseek on the prime fd to get the size. Older 2468 * kernels will just fail, in which case we fall back to the 2469 * provided (estimated or guess size). */ 2470 ret = lseek(prime_fd, 0, SEEK_END); 2471 if (ret != -1) 2472 bo_gem->bo.size = ret; 2473 else 2474 bo_gem->bo.size = size; 2475 2476 bo_gem->bo.handle = handle; 2477 bo_gem->bo.bufmgr = bufmgr; 2478 2479 bo_gem->gem_handle = handle; 2480 2481 atomic_set(&bo_gem->refcount, 1); 2482 2483 bo_gem->name = "prime"; 2484 bo_gem->validate_index = -1; 2485 bo_gem->reloc_tree_fences = 0; 2486 bo_gem->used_as_reloc_target = false; 2487 bo_gem->has_error = false; 2488 bo_gem->reusable = false; 2489 2490 DRMINITLISTHEAD(&bo_gem->name_list); 2491 DRMINITLISTHEAD(&bo_gem->vma_list); 2492 2493 VG_CLEAR(get_tiling); 2494 get_tiling.handle = bo_gem->gem_handle; 2495 ret = drmIoctl(bufmgr_gem->fd, 2496 DRM_IOCTL_I915_GEM_GET_TILING, 2497 &get_tiling); 2498 if (ret != 0) { 2499 drm_intel_gem_bo_unreference(&bo_gem->bo); 2500 return NULL; 2501 } 2502 bo_gem->tiling_mode = get_tiling.tiling_mode; 2503 bo_gem->swizzle_mode = get_tiling.swizzle_mode; 2504 /* XXX stride is unknown */ 2505 drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem); 2506 2507 return &bo_gem->bo; 2508} 2509 2510int 2511drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd) 2512{ 2513 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2514 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2515 2516 if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle, 2517 DRM_CLOEXEC, prime_fd) != 0) 2518 return -errno; 2519 2520 bo_gem->reusable = false; 2521 2522 return 0; 2523} 2524 2525static int 2526drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name) 2527{ 2528 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr; 2529 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2530 int ret; 2531 2532 if (!bo_gem->global_name) { 2533 struct drm_gem_flink flink; 2534 2535 VG_CLEAR(flink); 2536 flink.handle = bo_gem->gem_handle; 2537 2538 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink); 2539 if (ret != 0) 2540 return -errno; 2541 2542 bo_gem->global_name = flink.name; 2543 bo_gem->reusable = false; 2544 2545 DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named); 2546 } 2547 2548 *name = bo_gem->global_name; 2549 return 0; 2550} 2551 2552/** 2553 * Enables unlimited caching of buffer objects for reuse. 2554 * 2555 * This is potentially very memory expensive, as the cache at each bucket 2556 * size is only bounded by how many buffers of that size we've managed to have 2557 * in flight at once. 2558 */ 2559void 2560drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr) 2561{ 2562 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr; 2563 2564 bufmgr_gem->bo_reuse = true; 2565} 2566 2567/** 2568 * Enable use of fenced reloc type. 2569 * 2570 * New code should enable this to avoid unnecessary fence register 2571 * allocation. If this option is not enabled, all relocs will have fence 2572 * register allocated. 2573 */ 2574void 2575drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr) 2576{ 2577 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 2578 2579 if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2) 2580 bufmgr_gem->fenced_relocs = true; 2581} 2582 2583/** 2584 * Return the additional aperture space required by the tree of buffer objects 2585 * rooted at bo. 2586 */ 2587static int 2588drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo) 2589{ 2590 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2591 int i; 2592 int total = 0; 2593 2594 if (bo == NULL || bo_gem->included_in_check_aperture) 2595 return 0; 2596 2597 total += bo->size; 2598 bo_gem->included_in_check_aperture = true; 2599 2600 for (i = 0; i < bo_gem->reloc_count; i++) 2601 total += 2602 drm_intel_gem_bo_get_aperture_space(bo_gem-> 2603 reloc_target_info[i].bo); 2604 2605 return total; 2606} 2607 2608/** 2609 * Count the number of buffers in this list that need a fence reg 2610 * 2611 * If the count is greater than the number of available regs, we'll have 2612 * to ask the caller to resubmit a batch with fewer tiled buffers. 2613 * 2614 * This function over-counts if the same buffer is used multiple times. 2615 */ 2616static unsigned int 2617drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count) 2618{ 2619 int i; 2620 unsigned int total = 0; 2621 2622 for (i = 0; i < count; i++) { 2623 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 2624 2625 if (bo_gem == NULL) 2626 continue; 2627 2628 total += bo_gem->reloc_tree_fences; 2629 } 2630 return total; 2631} 2632 2633/** 2634 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready 2635 * for the next drm_intel_bufmgr_check_aperture_space() call. 2636 */ 2637static void 2638drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo) 2639{ 2640 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2641 int i; 2642 2643 if (bo == NULL || !bo_gem->included_in_check_aperture) 2644 return; 2645 2646 bo_gem->included_in_check_aperture = false; 2647 2648 for (i = 0; i < bo_gem->reloc_count; i++) 2649 drm_intel_gem_bo_clear_aperture_space_flag(bo_gem-> 2650 reloc_target_info[i].bo); 2651} 2652 2653/** 2654 * Return a conservative estimate for the amount of aperture required 2655 * for a collection of buffers. This may double-count some buffers. 2656 */ 2657static unsigned int 2658drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count) 2659{ 2660 int i; 2661 unsigned int total = 0; 2662 2663 for (i = 0; i < count; i++) { 2664 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i]; 2665 if (bo_gem != NULL) 2666 total += bo_gem->reloc_tree_size; 2667 } 2668 return total; 2669} 2670 2671/** 2672 * Return the amount of aperture needed for a collection of buffers. 2673 * This avoids double counting any buffers, at the cost of looking 2674 * at every buffer in the set. 2675 */ 2676static unsigned int 2677drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count) 2678{ 2679 int i; 2680 unsigned int total = 0; 2681 2682 for (i = 0; i < count; i++) { 2683 total += drm_intel_gem_bo_get_aperture_space(bo_array[i]); 2684 /* For the first buffer object in the array, we get an 2685 * accurate count back for its reloc_tree size (since nothing 2686 * had been flagged as being counted yet). We can save that 2687 * value out as a more conservative reloc_tree_size that 2688 * avoids double-counting target buffers. Since the first 2689 * buffer happens to usually be the batch buffer in our 2690 * callers, this can pull us back from doing the tree 2691 * walk on every new batch emit. 2692 */ 2693 if (i == 0) { 2694 drm_intel_bo_gem *bo_gem = 2695 (drm_intel_bo_gem *) bo_array[i]; 2696 bo_gem->reloc_tree_size = total; 2697 } 2698 } 2699 2700 for (i = 0; i < count; i++) 2701 drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]); 2702 return total; 2703} 2704 2705/** 2706 * Return -1 if the batchbuffer should be flushed before attempting to 2707 * emit rendering referencing the buffers pointed to by bo_array. 2708 * 2709 * This is required because if we try to emit a batchbuffer with relocations 2710 * to a tree of buffers that won't simultaneously fit in the aperture, 2711 * the rendering will return an error at a point where the software is not 2712 * prepared to recover from it. 2713 * 2714 * However, we also want to emit the batchbuffer significantly before we reach 2715 * the limit, as a series of batchbuffers each of which references buffers 2716 * covering almost all of the aperture means that at each emit we end up 2717 * waiting to evict a buffer from the last rendering, and we get synchronous 2718 * performance. By emitting smaller batchbuffers, we eat some CPU overhead to 2719 * get better parallelism. 2720 */ 2721static int 2722drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count) 2723{ 2724 drm_intel_bufmgr_gem *bufmgr_gem = 2725 (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr; 2726 unsigned int total = 0; 2727 unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4; 2728 int total_fences; 2729 2730 /* Check for fence reg constraints if necessary */ 2731 if (bufmgr_gem->available_fences) { 2732 total_fences = drm_intel_gem_total_fences(bo_array, count); 2733 if (total_fences > bufmgr_gem->available_fences) 2734 return -ENOSPC; 2735 } 2736 2737 total = drm_intel_gem_estimate_batch_space(bo_array, count); 2738 2739 if (total > threshold) 2740 total = drm_intel_gem_compute_batch_space(bo_array, count); 2741 2742 if (total > threshold) { 2743 DBG("check_space: overflowed available aperture, " 2744 "%dkb vs %dkb\n", 2745 total / 1024, (int)bufmgr_gem->gtt_size / 1024); 2746 return -ENOSPC; 2747 } else { 2748 DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024, 2749 (int)bufmgr_gem->gtt_size / 1024); 2750 return 0; 2751 } 2752} 2753 2754/* 2755 * Disable buffer reuse for objects which are shared with the kernel 2756 * as scanout buffers 2757 */ 2758static int 2759drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo) 2760{ 2761 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2762 2763 bo_gem->reusable = false; 2764 return 0; 2765} 2766 2767static int 2768drm_intel_gem_bo_is_reusable(drm_intel_bo *bo) 2769{ 2770 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2771 2772 return bo_gem->reusable; 2773} 2774 2775static int 2776_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 2777{ 2778 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 2779 int i; 2780 2781 for (i = 0; i < bo_gem->reloc_count; i++) { 2782 if (bo_gem->reloc_target_info[i].bo == target_bo) 2783 return 1; 2784 if (bo == bo_gem->reloc_target_info[i].bo) 2785 continue; 2786 if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo, 2787 target_bo)) 2788 return 1; 2789 } 2790 2791 return 0; 2792} 2793 2794/** Return true if target_bo is referenced by bo's relocation tree. */ 2795static int 2796drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo) 2797{ 2798 drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo; 2799 2800 if (bo == NULL || target_bo == NULL) 2801 return 0; 2802 if (target_bo_gem->used_as_reloc_target) 2803 return _drm_intel_gem_bo_references(bo, target_bo); 2804 return 0; 2805} 2806 2807static void 2808add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size) 2809{ 2810 unsigned int i = bufmgr_gem->num_buckets; 2811 2812 assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket)); 2813 2814 DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head); 2815 bufmgr_gem->cache_bucket[i].size = size; 2816 bufmgr_gem->num_buckets++; 2817} 2818 2819static void 2820init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem) 2821{ 2822 unsigned long size, cache_max_size = 64 * 1024 * 1024; 2823 2824 /* OK, so power of two buckets was too wasteful of memory. 2825 * Give 3 other sizes between each power of two, to hopefully 2826 * cover things accurately enough. (The alternative is 2827 * probably to just go for exact matching of sizes, and assume 2828 * that for things like composited window resize the tiled 2829 * width/height alignment and rounding of sizes to pages will 2830 * get us useful cache hit rates anyway) 2831 */ 2832 add_bucket(bufmgr_gem, 4096); 2833 add_bucket(bufmgr_gem, 4096 * 2); 2834 add_bucket(bufmgr_gem, 4096 * 3); 2835 2836 /* Initialize the linked lists for BO reuse cache. */ 2837 for (size = 4 * 4096; size <= cache_max_size; size *= 2) { 2838 add_bucket(bufmgr_gem, size); 2839 2840 add_bucket(bufmgr_gem, size + size * 1 / 4); 2841 add_bucket(bufmgr_gem, size + size * 2 / 4); 2842 add_bucket(bufmgr_gem, size + size * 3 / 4); 2843 } 2844} 2845 2846void 2847drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit) 2848{ 2849 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 2850 2851 bufmgr_gem->vma_max = limit; 2852 2853 drm_intel_gem_bo_purge_vma_cache(bufmgr_gem); 2854} 2855 2856/** 2857 * Get the PCI ID for the device. This can be overridden by setting the 2858 * INTEL_DEVID_OVERRIDE environment variable to the desired ID. 2859 */ 2860static int 2861get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem) 2862{ 2863 char *devid_override; 2864 int devid; 2865 int ret; 2866 drm_i915_getparam_t gp; 2867 2868 if (geteuid() == getuid()) { 2869 devid_override = getenv("INTEL_DEVID_OVERRIDE"); 2870 if (devid_override) { 2871 bufmgr_gem->no_exec = true; 2872 return strtod(devid_override, NULL); 2873 } 2874 } 2875 2876 VG_CLEAR(devid); 2877 VG_CLEAR(gp); 2878 gp.param = I915_PARAM_CHIPSET_ID; 2879 gp.value = &devid; 2880 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 2881 if (ret) { 2882 fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno); 2883 fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value); 2884 } 2885 return devid; 2886} 2887 2888int 2889drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr) 2890{ 2891 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 2892 2893 return bufmgr_gem->pci_device; 2894} 2895 2896/** 2897 * Sets the AUB filename. 2898 * 2899 * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump() 2900 * for it to have any effect. 2901 */ 2902void 2903drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr, 2904 const char *filename) 2905{ 2906 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 2907 2908 free(bufmgr_gem->aub_filename); 2909 if (filename) 2910 bufmgr_gem->aub_filename = strdup(filename); 2911} 2912 2913/** 2914 * Sets up AUB dumping. 2915 * 2916 * This is a trace file format that can be used with the simulator. 2917 * Packets are emitted in a format somewhat like GPU command packets. 2918 * You can set up a GTT and upload your objects into the referenced 2919 * space, then send off batchbuffers and get BMPs out the other end. 2920 */ 2921void 2922drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable) 2923{ 2924 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 2925 int entry = 0x200003; 2926 int i; 2927 int gtt_size = 0x10000; 2928 const char *filename; 2929 2930 if (!enable) { 2931 if (bufmgr_gem->aub_file) { 2932 fclose(bufmgr_gem->aub_file); 2933 bufmgr_gem->aub_file = NULL; 2934 } 2935 return; 2936 } 2937 2938 if (geteuid() != getuid()) 2939 return; 2940 2941 if (bufmgr_gem->aub_filename) 2942 filename = bufmgr_gem->aub_filename; 2943 else 2944 filename = "intel.aub"; 2945 bufmgr_gem->aub_file = fopen(filename, "w+"); 2946 if (!bufmgr_gem->aub_file) 2947 return; 2948 2949 /* Start allocating objects from just after the GTT. */ 2950 bufmgr_gem->aub_offset = gtt_size; 2951 2952 /* Start with a (required) version packet. */ 2953 aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2)); 2954 aub_out(bufmgr_gem, 2955 (4 << AUB_HEADER_MAJOR_SHIFT) | 2956 (0 << AUB_HEADER_MINOR_SHIFT)); 2957 for (i = 0; i < 8; i++) { 2958 aub_out(bufmgr_gem, 0); /* app name */ 2959 } 2960 aub_out(bufmgr_gem, 0); /* timestamp */ 2961 aub_out(bufmgr_gem, 0); /* timestamp */ 2962 aub_out(bufmgr_gem, 0); /* comment len */ 2963 2964 /* Set up the GTT. The max we can handle is 256M */ 2965 aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2)); 2966 aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_NONLOCAL | 0 | AUB_TRACE_OP_DATA_WRITE); 2967 aub_out(bufmgr_gem, 0); /* subtype */ 2968 aub_out(bufmgr_gem, 0); /* offset */ 2969 aub_out(bufmgr_gem, gtt_size); /* size */ 2970 if (bufmgr_gem->gen >= 8) 2971 aub_out(bufmgr_gem, 0); 2972 for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) { 2973 aub_out(bufmgr_gem, entry); 2974 } 2975} 2976 2977drm_intel_context * 2978drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr) 2979{ 2980 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 2981 struct drm_i915_gem_context_create create; 2982 drm_intel_context *context = NULL; 2983 int ret; 2984 2985 VG_CLEAR(create); 2986 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create); 2987 if (ret != 0) { 2988 DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n", 2989 strerror(errno)); 2990 return NULL; 2991 } 2992 2993 context = calloc(1, sizeof(*context)); 2994 context->ctx_id = create.ctx_id; 2995 context->bufmgr = bufmgr; 2996 2997 return context; 2998} 2999 3000void 3001drm_intel_gem_context_destroy(drm_intel_context *ctx) 3002{ 3003 drm_intel_bufmgr_gem *bufmgr_gem; 3004 struct drm_i915_gem_context_destroy destroy; 3005 int ret; 3006 3007 if (ctx == NULL) 3008 return; 3009 3010 VG_CLEAR(destroy); 3011 3012 bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; 3013 destroy.ctx_id = ctx->ctx_id; 3014 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY, 3015 &destroy); 3016 if (ret != 0) 3017 fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n", 3018 strerror(errno)); 3019 3020 free(ctx); 3021} 3022 3023int 3024drm_intel_get_reset_stats(drm_intel_context *ctx, 3025 uint32_t *reset_count, 3026 uint32_t *active, 3027 uint32_t *pending) 3028{ 3029 drm_intel_bufmgr_gem *bufmgr_gem; 3030 struct drm_i915_reset_stats stats; 3031 int ret; 3032 3033 if (ctx == NULL) 3034 return -EINVAL; 3035 3036 VG_CLEAR(stats); 3037 3038 bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr; 3039 stats.ctx_id = ctx->ctx_id; 3040 ret = drmIoctl(bufmgr_gem->fd, 3041 DRM_IOCTL_I915_GET_RESET_STATS, 3042 &stats); 3043 if (ret == 0) { 3044 if (reset_count != NULL) 3045 *reset_count = stats.reset_count; 3046 3047 if (active != NULL) 3048 *active = stats.batch_active; 3049 3050 if (pending != NULL) 3051 *pending = stats.batch_pending; 3052 } 3053 3054 return ret; 3055} 3056 3057int 3058drm_intel_reg_read(drm_intel_bufmgr *bufmgr, 3059 uint32_t offset, 3060 uint64_t *result) 3061{ 3062 drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr; 3063 struct drm_i915_reg_read reg_read; 3064 int ret; 3065 3066 VG_CLEAR(reg_read); 3067 reg_read.offset = offset; 3068 3069 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, ®_read); 3070 3071 *result = reg_read.val; 3072 return ret; 3073} 3074 3075 3076/** 3077 * Annotate the given bo for use in aub dumping. 3078 * 3079 * \param annotations is an array of drm_intel_aub_annotation objects 3080 * describing the type of data in various sections of the bo. Each 3081 * element of the array specifies the type and subtype of a section of 3082 * the bo, and the past-the-end offset of that section. The elements 3083 * of \c annotations must be sorted so that ending_offset is 3084 * increasing. 3085 * 3086 * \param count is the number of elements in the \c annotations array. 3087 * If \c count is zero, then \c annotations will not be dereferenced. 3088 * 3089 * Annotations are copied into a private data structure, so caller may 3090 * re-use the memory pointed to by \c annotations after the call 3091 * returns. 3092 * 3093 * Annotations are stored for the lifetime of the bo; to reset to the 3094 * default state (no annotations), call this function with a \c count 3095 * of zero. 3096 */ 3097void 3098drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo, 3099 drm_intel_aub_annotation *annotations, 3100 unsigned count) 3101{ 3102 drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo; 3103 unsigned size = sizeof(*annotations) * count; 3104 drm_intel_aub_annotation *new_annotations = 3105 count > 0 ? realloc(bo_gem->aub_annotations, size) : NULL; 3106 if (new_annotations == NULL) { 3107 free(bo_gem->aub_annotations); 3108 bo_gem->aub_annotations = NULL; 3109 bo_gem->aub_annotation_count = 0; 3110 return; 3111 } 3112 memcpy(new_annotations, annotations, size); 3113 bo_gem->aub_annotations = new_annotations; 3114 bo_gem->aub_annotation_count = count; 3115} 3116 3117/** 3118 * Initializes the GEM buffer manager, which uses the kernel to allocate, map, 3119 * and manage map buffer objections. 3120 * 3121 * \param fd File descriptor of the opened DRM device. 3122 */ 3123drm_intel_bufmgr * 3124drm_intel_bufmgr_gem_init(int fd, int batch_size) 3125{ 3126 drm_intel_bufmgr_gem *bufmgr_gem; 3127 struct drm_i915_gem_get_aperture aperture; 3128 drm_i915_getparam_t gp; 3129 int ret, tmp; 3130 bool exec2 = false; 3131 3132 bufmgr_gem = calloc(1, sizeof(*bufmgr_gem)); 3133 if (bufmgr_gem == NULL) 3134 return NULL; 3135 3136 bufmgr_gem->fd = fd; 3137 3138 if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) { 3139 free(bufmgr_gem); 3140 return NULL; 3141 } 3142 3143 ret = drmIoctl(bufmgr_gem->fd, 3144 DRM_IOCTL_I915_GEM_GET_APERTURE, 3145 &aperture); 3146 3147 if (ret == 0) 3148 bufmgr_gem->gtt_size = aperture.aper_available_size; 3149 else { 3150 fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n", 3151 strerror(errno)); 3152 bufmgr_gem->gtt_size = 128 * 1024 * 1024; 3153 fprintf(stderr, "Assuming %dkB available aperture size.\n" 3154 "May lead to reduced performance or incorrect " 3155 "rendering.\n", 3156 (int)bufmgr_gem->gtt_size / 1024); 3157 } 3158 3159 bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem); 3160 3161 if (IS_GEN2(bufmgr_gem->pci_device)) 3162 bufmgr_gem->gen = 2; 3163 else if (IS_GEN3(bufmgr_gem->pci_device)) 3164 bufmgr_gem->gen = 3; 3165 else if (IS_GEN4(bufmgr_gem->pci_device)) 3166 bufmgr_gem->gen = 4; 3167 else if (IS_GEN5(bufmgr_gem->pci_device)) 3168 bufmgr_gem->gen = 5; 3169 else if (IS_GEN6(bufmgr_gem->pci_device)) 3170 bufmgr_gem->gen = 6; 3171 else if (IS_GEN7(bufmgr_gem->pci_device)) 3172 bufmgr_gem->gen = 7; 3173 else if (IS_GEN8(bufmgr_gem->pci_device)) 3174 bufmgr_gem->gen = 8; 3175 else { 3176 free(bufmgr_gem); 3177 return NULL; 3178 } 3179 3180 if (IS_GEN3(bufmgr_gem->pci_device) && 3181 bufmgr_gem->gtt_size > 256*1024*1024) { 3182 /* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't 3183 * be used for tiled blits. To simplify the accounting, just 3184 * substract the unmappable part (fixed to 256MB on all known 3185 * gen3 devices) if the kernel advertises it. */ 3186 bufmgr_gem->gtt_size -= 256*1024*1024; 3187 } 3188 3189 VG_CLEAR(gp); 3190 gp.value = &tmp; 3191 3192 gp.param = I915_PARAM_HAS_EXECBUF2; 3193 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3194 if (!ret) 3195 exec2 = true; 3196 3197 gp.param = I915_PARAM_HAS_BSD; 3198 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3199 bufmgr_gem->has_bsd = ret == 0; 3200 3201 gp.param = I915_PARAM_HAS_BLT; 3202 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3203 bufmgr_gem->has_blt = ret == 0; 3204 3205 gp.param = I915_PARAM_HAS_RELAXED_FENCING; 3206 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3207 bufmgr_gem->has_relaxed_fencing = ret == 0; 3208 3209 gp.param = I915_PARAM_HAS_WAIT_TIMEOUT; 3210 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3211 bufmgr_gem->has_wait_timeout = ret == 0; 3212 3213 gp.param = I915_PARAM_HAS_LLC; 3214 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3215 if (ret != 0) { 3216 /* Kernel does not supports HAS_LLC query, fallback to GPU 3217 * generation detection and assume that we have LLC on GEN6/7 3218 */ 3219 bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) | 3220 IS_GEN7(bufmgr_gem->pci_device)); 3221 } else 3222 bufmgr_gem->has_llc = *gp.value; 3223 3224 gp.param = I915_PARAM_HAS_VEBOX; 3225 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3226 bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0); 3227 3228 if (bufmgr_gem->gen < 4) { 3229 gp.param = I915_PARAM_NUM_FENCES_AVAIL; 3230 gp.value = &bufmgr_gem->available_fences; 3231 ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp); 3232 if (ret) { 3233 fprintf(stderr, "get fences failed: %d [%d]\n", ret, 3234 errno); 3235 fprintf(stderr, "param: %d, val: %d\n", gp.param, 3236 *gp.value); 3237 bufmgr_gem->available_fences = 0; 3238 } else { 3239 /* XXX The kernel reports the total number of fences, 3240 * including any that may be pinned. 3241 * 3242 * We presume that there will be at least one pinned 3243 * fence for the scanout buffer, but there may be more 3244 * than one scanout and the user may be manually 3245 * pinning buffers. Let's move to execbuffer2 and 3246 * thereby forget the insanity of using fences... 3247 */ 3248 bufmgr_gem->available_fences -= 2; 3249 if (bufmgr_gem->available_fences < 0) 3250 bufmgr_gem->available_fences = 0; 3251 } 3252 } 3253 3254 /* Let's go with one relocation per every 2 dwords (but round down a bit 3255 * since a power of two will mean an extra page allocation for the reloc 3256 * buffer). 3257 * 3258 * Every 4 was too few for the blender benchmark. 3259 */ 3260 bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2; 3261 3262 bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc; 3263 bufmgr_gem->bufmgr.bo_alloc_for_render = 3264 drm_intel_gem_bo_alloc_for_render; 3265 bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled; 3266 bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference; 3267 bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference; 3268 bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map; 3269 bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap; 3270 bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata; 3271 bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata; 3272 bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering; 3273 bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc; 3274 bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence; 3275 bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin; 3276 bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin; 3277 bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling; 3278 bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling; 3279 bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink; 3280 /* Use the new one if available */ 3281 if (exec2) { 3282 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2; 3283 bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2; 3284 } else 3285 bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec; 3286 bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy; 3287 bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise; 3288 bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy; 3289 bufmgr_gem->bufmgr.debug = 0; 3290 bufmgr_gem->bufmgr.check_aperture_space = 3291 drm_intel_gem_check_aperture_space; 3292 bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse; 3293 bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable; 3294 bufmgr_gem->bufmgr.get_pipe_from_crtc_id = 3295 drm_intel_gem_get_pipe_from_crtc_id; 3296 bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references; 3297 3298 DRMINITLISTHEAD(&bufmgr_gem->named); 3299 init_cache_buckets(bufmgr_gem); 3300 3301 DRMINITLISTHEAD(&bufmgr_gem->vma_cache); 3302 bufmgr_gem->vma_max = -1; /* unlimited by default */ 3303 3304 return &bufmgr_gem->bufmgr; 3305} 3306