1/**************************************************************************
2 *
3 * Copyright � 2007 Red Hat Inc.
4 * Copyright � 2007-2012 Intel Corporation
5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
26 * of the Software.
27 *
28 *
29 **************************************************************************/
30/*
31 * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com>
32 *          Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33 *	    Eric Anholt <eric@anholt.net>
34 *	    Dave Airlie <airlied@linux.ie>
35 */
36
37#ifdef HAVE_CONFIG_H
38#include "config.h"
39#endif
40
41#include <xf86drm.h>
42#include <xf86atomic.h>
43#include <fcntl.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <unistd.h>
48#include <assert.h>
49#include <pthread.h>
50#include <sys/ioctl.h>
51#include <sys/stat.h>
52#include <sys/types.h>
53#include <stdbool.h>
54
55#include "errno.h"
56#ifndef ETIME
57#define ETIME ETIMEDOUT
58#endif
59#include "libdrm.h"
60#include "libdrm_lists.h"
61#include "intel_bufmgr.h"
62#include "intel_bufmgr_priv.h"
63#include "intel_chipset.h"
64#include "intel_aub.h"
65#include "string.h"
66
67#include "i915_drm.h"
68
69#ifdef HAVE_VALGRIND
70#include <valgrind.h>
71#include <memcheck.h>
72#define VG(x) x
73#else
74#define VG(x)
75#endif
76
77#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s)))
78
79#define DBG(...) do {					\
80	if (bufmgr_gem->bufmgr.debug)			\
81		fprintf(stderr, __VA_ARGS__);		\
82} while (0)
83
84#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
85
86typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
87
88struct drm_intel_gem_bo_bucket {
89	drmMMListHead head;
90	unsigned long size;
91};
92
93typedef struct _drm_intel_bufmgr_gem {
94	drm_intel_bufmgr bufmgr;
95
96	atomic_t refcount;
97
98	int fd;
99
100	int max_relocs;
101
102	pthread_mutex_t lock;
103
104	struct drm_i915_gem_exec_object *exec_objects;
105	struct drm_i915_gem_exec_object2 *exec2_objects;
106	drm_intel_bo **exec_bos;
107	int exec_size;
108	int exec_count;
109
110	/** Array of lists of cached gem objects of power-of-two sizes */
111	struct drm_intel_gem_bo_bucket cache_bucket[14 * 4];
112	int num_buckets;
113	time_t time;
114
115	drmMMListHead managers;
116
117	drmMMListHead named;
118	drmMMListHead vma_cache;
119	int vma_count, vma_open, vma_max;
120
121	uint64_t gtt_size;
122	int available_fences;
123	int pci_device;
124	int gen;
125	unsigned int has_bsd : 1;
126	unsigned int has_blt : 1;
127	unsigned int has_relaxed_fencing : 1;
128	unsigned int has_llc : 1;
129	unsigned int has_wait_timeout : 1;
130	unsigned int bo_reuse : 1;
131	unsigned int no_exec : 1;
132	unsigned int has_vebox : 1;
133	bool fenced_relocs;
134
135	char *aub_filename;
136	FILE *aub_file;
137	uint32_t aub_offset;
138} drm_intel_bufmgr_gem;
139
140#define DRM_INTEL_RELOC_FENCE (1<<0)
141
142typedef struct _drm_intel_reloc_target_info {
143	drm_intel_bo *bo;
144	int flags;
145} drm_intel_reloc_target;
146
147struct _drm_intel_bo_gem {
148	drm_intel_bo bo;
149
150	atomic_t refcount;
151	uint32_t gem_handle;
152	const char *name;
153
154	/**
155	 * Kenel-assigned global name for this object
156         *
157         * List contains both flink named and prime fd'd objects
158	 */
159	unsigned int global_name;
160	drmMMListHead name_list;
161
162	/**
163	 * Index of the buffer within the validation list while preparing a
164	 * batchbuffer execution.
165	 */
166	int validate_index;
167
168	/**
169	 * Current tiling mode
170	 */
171	uint32_t tiling_mode;
172	uint32_t swizzle_mode;
173	unsigned long stride;
174
175	time_t free_time;
176
177	/** Array passed to the DRM containing relocation information. */
178	struct drm_i915_gem_relocation_entry *relocs;
179	/**
180	 * Array of info structs corresponding to relocs[i].target_handle etc
181	 */
182	drm_intel_reloc_target *reloc_target_info;
183	/** Number of entries in relocs */
184	int reloc_count;
185	/** Mapped address for the buffer, saved across map/unmap cycles */
186	void *mem_virtual;
187	/** GTT virtual address for the buffer, saved across map/unmap cycles */
188	void *gtt_virtual;
189	/**
190	 * Virtual address of the buffer allocated by user, used for userptr
191	 * objects only.
192	 */
193	void *user_virtual;
194	int map_count;
195	drmMMListHead vma_list;
196
197	/** BO cache list */
198	drmMMListHead head;
199
200	/**
201	 * Boolean of whether this BO and its children have been included in
202	 * the current drm_intel_bufmgr_check_aperture_space() total.
203	 */
204	bool included_in_check_aperture;
205
206	/**
207	 * Boolean of whether this buffer has been used as a relocation
208	 * target and had its size accounted for, and thus can't have any
209	 * further relocations added to it.
210	 */
211	bool used_as_reloc_target;
212
213	/**
214	 * Boolean of whether we have encountered an error whilst building the relocation tree.
215	 */
216	bool has_error;
217
218	/**
219	 * Boolean of whether this buffer can be re-used
220	 */
221	bool reusable;
222
223	/**
224	 * Boolean of whether the GPU is definitely not accessing the buffer.
225	 *
226	 * This is only valid when reusable, since non-reusable
227	 * buffers are those that have been shared wth other
228	 * processes, so we don't know their state.
229	 */
230	bool idle;
231
232	/**
233	 * Boolean of whether this buffer was allocated with userptr
234	 */
235	bool is_userptr;
236
237	/**
238	 * Size in bytes of this buffer and its relocation descendents.
239	 *
240	 * Used to avoid costly tree walking in
241	 * drm_intel_bufmgr_check_aperture in the common case.
242	 */
243	int reloc_tree_size;
244
245	/**
246	 * Number of potential fence registers required by this buffer and its
247	 * relocations.
248	 */
249	int reloc_tree_fences;
250
251	/** Flags that we may need to do the SW_FINSIH ioctl on unmap. */
252	bool mapped_cpu_write;
253
254	uint32_t aub_offset;
255
256	drm_intel_aub_annotation *aub_annotations;
257	unsigned aub_annotation_count;
258};
259
260static unsigned int
261drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count);
262
263static unsigned int
264drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count);
265
266static int
267drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
268			    uint32_t * swizzle_mode);
269
270static int
271drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
272				     uint32_t tiling_mode,
273				     uint32_t stride);
274
275static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
276						      time_t time);
277
278static void drm_intel_gem_bo_unreference(drm_intel_bo *bo);
279
280static void drm_intel_gem_bo_free(drm_intel_bo *bo);
281
282static unsigned long
283drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size,
284			   uint32_t *tiling_mode)
285{
286	unsigned long min_size, max_size;
287	unsigned long i;
288
289	if (*tiling_mode == I915_TILING_NONE)
290		return size;
291
292	/* 965+ just need multiples of page size for tiling */
293	if (bufmgr_gem->gen >= 4)
294		return ROUND_UP_TO(size, 4096);
295
296	/* Older chips need powers of two, of at least 512k or 1M */
297	if (bufmgr_gem->gen == 3) {
298		min_size = 1024*1024;
299		max_size = 128*1024*1024;
300	} else {
301		min_size = 512*1024;
302		max_size = 64*1024*1024;
303	}
304
305	if (size > max_size) {
306		*tiling_mode = I915_TILING_NONE;
307		return size;
308	}
309
310	/* Do we need to allocate every page for the fence? */
311	if (bufmgr_gem->has_relaxed_fencing)
312		return ROUND_UP_TO(size, 4096);
313
314	for (i = min_size; i < size; i <<= 1)
315		;
316
317	return i;
318}
319
320/*
321 * Round a given pitch up to the minimum required for X tiling on a
322 * given chip.  We use 512 as the minimum to allow for a later tiling
323 * change.
324 */
325static unsigned long
326drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem,
327			    unsigned long pitch, uint32_t *tiling_mode)
328{
329	unsigned long tile_width;
330	unsigned long i;
331
332	/* If untiled, then just align it so that we can do rendering
333	 * to it with the 3D engine.
334	 */
335	if (*tiling_mode == I915_TILING_NONE)
336		return ALIGN(pitch, 64);
337
338	if (*tiling_mode == I915_TILING_X
339			|| (IS_915(bufmgr_gem->pci_device)
340			    && *tiling_mode == I915_TILING_Y))
341		tile_width = 512;
342	else
343		tile_width = 128;
344
345	/* 965 is flexible */
346	if (bufmgr_gem->gen >= 4)
347		return ROUND_UP_TO(pitch, tile_width);
348
349	/* The older hardware has a maximum pitch of 8192 with tiled
350	 * surfaces, so fallback to untiled if it's too large.
351	 */
352	if (pitch > 8192) {
353		*tiling_mode = I915_TILING_NONE;
354		return ALIGN(pitch, 64);
355	}
356
357	/* Pre-965 needs power of two tile width */
358	for (i = tile_width; i < pitch; i <<= 1)
359		;
360
361	return i;
362}
363
364static struct drm_intel_gem_bo_bucket *
365drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
366				 unsigned long size)
367{
368	int i;
369
370	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
371		struct drm_intel_gem_bo_bucket *bucket =
372		    &bufmgr_gem->cache_bucket[i];
373		if (bucket->size >= size) {
374			return bucket;
375		}
376	}
377
378	return NULL;
379}
380
381static void
382drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
383{
384	int i, j;
385
386	for (i = 0; i < bufmgr_gem->exec_count; i++) {
387		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
388		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
389
390		if (bo_gem->relocs == NULL) {
391			DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle,
392			    bo_gem->name);
393			continue;
394		}
395
396		for (j = 0; j < bo_gem->reloc_count; j++) {
397			drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo;
398			drm_intel_bo_gem *target_gem =
399			    (drm_intel_bo_gem *) target_bo;
400
401			DBG("%2d: %d (%s)@0x%08llx -> "
402			    "%d (%s)@0x%08lx + 0x%08x\n",
403			    i,
404			    bo_gem->gem_handle, bo_gem->name,
405			    (unsigned long long)bo_gem->relocs[j].offset,
406			    target_gem->gem_handle,
407			    target_gem->name,
408			    target_bo->offset64,
409			    bo_gem->relocs[j].delta);
410		}
411	}
412}
413
414static inline void
415drm_intel_gem_bo_reference(drm_intel_bo *bo)
416{
417	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
418
419	atomic_inc(&bo_gem->refcount);
420}
421
422/**
423 * Adds the given buffer to the list of buffers to be validated (moved into the
424 * appropriate memory type) with the next batch submission.
425 *
426 * If a buffer is validated multiple times in a batch submission, it ends up
427 * with the intersection of the memory type flags and the union of the
428 * access flags.
429 */
430static void
431drm_intel_add_validate_buffer(drm_intel_bo *bo)
432{
433	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
434	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
435	int index;
436
437	if (bo_gem->validate_index != -1)
438		return;
439
440	/* Extend the array of validation entries as necessary. */
441	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
442		int new_size = bufmgr_gem->exec_size * 2;
443
444		if (new_size == 0)
445			new_size = 5;
446
447		bufmgr_gem->exec_objects =
448		    realloc(bufmgr_gem->exec_objects,
449			    sizeof(*bufmgr_gem->exec_objects) * new_size);
450		bufmgr_gem->exec_bos =
451		    realloc(bufmgr_gem->exec_bos,
452			    sizeof(*bufmgr_gem->exec_bos) * new_size);
453		bufmgr_gem->exec_size = new_size;
454	}
455
456	index = bufmgr_gem->exec_count;
457	bo_gem->validate_index = index;
458	/* Fill in array entry */
459	bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle;
460	bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count;
461	bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs;
462	bufmgr_gem->exec_objects[index].alignment = 0;
463	bufmgr_gem->exec_objects[index].offset = 0;
464	bufmgr_gem->exec_bos[index] = bo;
465	bufmgr_gem->exec_count++;
466}
467
468static void
469drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
470{
471	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
472	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
473	int index;
474
475	if (bo_gem->validate_index != -1) {
476		if (need_fence)
477			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
478				EXEC_OBJECT_NEEDS_FENCE;
479		return;
480	}
481
482	/* Extend the array of validation entries as necessary. */
483	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
484		int new_size = bufmgr_gem->exec_size * 2;
485
486		if (new_size == 0)
487			new_size = 5;
488
489		bufmgr_gem->exec2_objects =
490			realloc(bufmgr_gem->exec2_objects,
491				sizeof(*bufmgr_gem->exec2_objects) * new_size);
492		bufmgr_gem->exec_bos =
493			realloc(bufmgr_gem->exec_bos,
494				sizeof(*bufmgr_gem->exec_bos) * new_size);
495		bufmgr_gem->exec_size = new_size;
496	}
497
498	index = bufmgr_gem->exec_count;
499	bo_gem->validate_index = index;
500	/* Fill in array entry */
501	bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle;
502	bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
503	bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
504	bufmgr_gem->exec2_objects[index].alignment = 0;
505	bufmgr_gem->exec2_objects[index].offset = 0;
506	bufmgr_gem->exec_bos[index] = bo;
507	bufmgr_gem->exec2_objects[index].flags = 0;
508	bufmgr_gem->exec2_objects[index].rsvd1 = 0;
509	bufmgr_gem->exec2_objects[index].rsvd2 = 0;
510	if (need_fence) {
511		bufmgr_gem->exec2_objects[index].flags |=
512			EXEC_OBJECT_NEEDS_FENCE;
513	}
514	bufmgr_gem->exec_count++;
515}
516
517#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
518	sizeof(uint32_t))
519
520static void
521drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem,
522				      drm_intel_bo_gem *bo_gem)
523{
524	int size;
525
526	assert(!bo_gem->used_as_reloc_target);
527
528	/* The older chipsets are far-less flexible in terms of tiling,
529	 * and require tiled buffer to be size aligned in the aperture.
530	 * This means that in the worst possible case we will need a hole
531	 * twice as large as the object in order for it to fit into the
532	 * aperture. Optimal packing is for wimps.
533	 */
534	size = bo_gem->bo.size;
535	if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) {
536		int min_size;
537
538		if (bufmgr_gem->has_relaxed_fencing) {
539			if (bufmgr_gem->gen == 3)
540				min_size = 1024*1024;
541			else
542				min_size = 512*1024;
543
544			while (min_size < size)
545				min_size *= 2;
546		} else
547			min_size = size;
548
549		/* Account for worst-case alignment. */
550		size = 2 * min_size;
551	}
552
553	bo_gem->reloc_tree_size = size;
554}
555
556static int
557drm_intel_setup_reloc_list(drm_intel_bo *bo)
558{
559	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
560	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
561	unsigned int max_relocs = bufmgr_gem->max_relocs;
562
563	if (bo->size / 4 < max_relocs)
564		max_relocs = bo->size / 4;
565
566	bo_gem->relocs = malloc(max_relocs *
567				sizeof(struct drm_i915_gem_relocation_entry));
568	bo_gem->reloc_target_info = malloc(max_relocs *
569					   sizeof(drm_intel_reloc_target));
570	if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) {
571		bo_gem->has_error = true;
572
573		free (bo_gem->relocs);
574		bo_gem->relocs = NULL;
575
576		free (bo_gem->reloc_target_info);
577		bo_gem->reloc_target_info = NULL;
578
579		return 1;
580	}
581
582	return 0;
583}
584
585static int
586drm_intel_gem_bo_busy(drm_intel_bo *bo)
587{
588	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
589	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
590	struct drm_i915_gem_busy busy;
591	int ret;
592
593	if (bo_gem->reusable && bo_gem->idle)
594		return false;
595
596	VG_CLEAR(busy);
597	busy.handle = bo_gem->gem_handle;
598
599	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
600	if (ret == 0) {
601		bo_gem->idle = !busy.busy;
602		return busy.busy;
603	} else {
604		return false;
605	}
606	return (ret == 0 && busy.busy);
607}
608
609static int
610drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem,
611				  drm_intel_bo_gem *bo_gem, int state)
612{
613	struct drm_i915_gem_madvise madv;
614
615	VG_CLEAR(madv);
616	madv.handle = bo_gem->gem_handle;
617	madv.madv = state;
618	madv.retained = 1;
619	drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
620
621	return madv.retained;
622}
623
624static int
625drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv)
626{
627	return drm_intel_gem_bo_madvise_internal
628		((drm_intel_bufmgr_gem *) bo->bufmgr,
629		 (drm_intel_bo_gem *) bo,
630		 madv);
631}
632
633/* drop the oldest entries that have been purged by the kernel */
634static void
635drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
636				    struct drm_intel_gem_bo_bucket *bucket)
637{
638	while (!DRMLISTEMPTY(&bucket->head)) {
639		drm_intel_bo_gem *bo_gem;
640
641		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
642				      bucket->head.next, head);
643		if (drm_intel_gem_bo_madvise_internal
644		    (bufmgr_gem, bo_gem, I915_MADV_DONTNEED))
645			break;
646
647		DRMLISTDEL(&bo_gem->head);
648		drm_intel_gem_bo_free(&bo_gem->bo);
649	}
650}
651
652static drm_intel_bo *
653drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
654				const char *name,
655				unsigned long size,
656				unsigned long flags,
657				uint32_t tiling_mode,
658				unsigned long stride)
659{
660	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
661	drm_intel_bo_gem *bo_gem;
662	unsigned int page_size = getpagesize();
663	int ret;
664	struct drm_intel_gem_bo_bucket *bucket;
665	bool alloc_from_cache;
666	unsigned long bo_size;
667	bool for_render = false;
668
669	if (flags & BO_ALLOC_FOR_RENDER)
670		for_render = true;
671
672	/* Round the allocated size up to a power of two number of pages. */
673	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
674
675	/* If we don't have caching at this size, don't actually round the
676	 * allocation up.
677	 */
678	if (bucket == NULL) {
679		bo_size = size;
680		if (bo_size < page_size)
681			bo_size = page_size;
682	} else {
683		bo_size = bucket->size;
684	}
685
686	pthread_mutex_lock(&bufmgr_gem->lock);
687	/* Get a buffer out of the cache if available */
688retry:
689	alloc_from_cache = false;
690	if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) {
691		if (for_render) {
692			/* Allocate new render-target BOs from the tail (MRU)
693			 * of the list, as it will likely be hot in the GPU
694			 * cache and in the aperture for us.
695			 */
696			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
697					      bucket->head.prev, head);
698			DRMLISTDEL(&bo_gem->head);
699			alloc_from_cache = true;
700		} else {
701			/* For non-render-target BOs (where we're probably
702			 * going to map it first thing in order to fill it
703			 * with data), check if the last BO in the cache is
704			 * unbusy, and only reuse in that case. Otherwise,
705			 * allocating a new buffer is probably faster than
706			 * waiting for the GPU to finish.
707			 */
708			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
709					      bucket->head.next, head);
710			if (!drm_intel_gem_bo_busy(&bo_gem->bo)) {
711				alloc_from_cache = true;
712				DRMLISTDEL(&bo_gem->head);
713			}
714		}
715
716		if (alloc_from_cache) {
717			if (!drm_intel_gem_bo_madvise_internal
718			    (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
719				drm_intel_gem_bo_free(&bo_gem->bo);
720				drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
721								    bucket);
722				goto retry;
723			}
724
725			if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
726								 tiling_mode,
727								 stride)) {
728				drm_intel_gem_bo_free(&bo_gem->bo);
729				goto retry;
730			}
731		}
732	}
733	pthread_mutex_unlock(&bufmgr_gem->lock);
734
735	if (!alloc_from_cache) {
736		struct drm_i915_gem_create create;
737
738		bo_gem = calloc(1, sizeof(*bo_gem));
739		if (!bo_gem)
740			return NULL;
741
742		bo_gem->bo.size = bo_size;
743
744		VG_CLEAR(create);
745		create.size = bo_size;
746
747		ret = drmIoctl(bufmgr_gem->fd,
748			       DRM_IOCTL_I915_GEM_CREATE,
749			       &create);
750		bo_gem->gem_handle = create.handle;
751		bo_gem->bo.handle = bo_gem->gem_handle;
752		if (ret != 0) {
753			free(bo_gem);
754			return NULL;
755		}
756		bo_gem->bo.bufmgr = bufmgr;
757
758		bo_gem->tiling_mode = I915_TILING_NONE;
759		bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
760		bo_gem->stride = 0;
761
762		/* drm_intel_gem_bo_free calls DRMLISTDEL() for an uninitialized
763		   list (vma_list), so better set the list head here */
764		DRMINITLISTHEAD(&bo_gem->name_list);
765		DRMINITLISTHEAD(&bo_gem->vma_list);
766		if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
767							 tiling_mode,
768							 stride)) {
769		    drm_intel_gem_bo_free(&bo_gem->bo);
770		    return NULL;
771		}
772	}
773
774	bo_gem->name = name;
775	atomic_set(&bo_gem->refcount, 1);
776	bo_gem->validate_index = -1;
777	bo_gem->reloc_tree_fences = 0;
778	bo_gem->used_as_reloc_target = false;
779	bo_gem->has_error = false;
780	bo_gem->reusable = true;
781	bo_gem->aub_annotations = NULL;
782	bo_gem->aub_annotation_count = 0;
783
784	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
785
786	DBG("bo_create: buf %d (%s) %ldb\n",
787	    bo_gem->gem_handle, bo_gem->name, size);
788
789	return &bo_gem->bo;
790}
791
792static drm_intel_bo *
793drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
794				  const char *name,
795				  unsigned long size,
796				  unsigned int alignment)
797{
798	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
799					       BO_ALLOC_FOR_RENDER,
800					       I915_TILING_NONE, 0);
801}
802
803static drm_intel_bo *
804drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr,
805		       const char *name,
806		       unsigned long size,
807		       unsigned int alignment)
808{
809	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0,
810					       I915_TILING_NONE, 0);
811}
812
813static drm_intel_bo *
814drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
815			     int x, int y, int cpp, uint32_t *tiling_mode,
816			     unsigned long *pitch, unsigned long flags)
817{
818	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
819	unsigned long size, stride;
820	uint32_t tiling;
821
822	do {
823		unsigned long aligned_y, height_alignment;
824
825		tiling = *tiling_mode;
826
827		/* If we're tiled, our allocations are in 8 or 32-row blocks,
828		 * so failure to align our height means that we won't allocate
829		 * enough pages.
830		 *
831		 * If we're untiled, we still have to align to 2 rows high
832		 * because the data port accesses 2x2 blocks even if the
833		 * bottom row isn't to be rendered, so failure to align means
834		 * we could walk off the end of the GTT and fault.  This is
835		 * documented on 965, and may be the case on older chipsets
836		 * too so we try to be careful.
837		 */
838		aligned_y = y;
839		height_alignment = 2;
840
841		if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE)
842			height_alignment = 16;
843		else if (tiling == I915_TILING_X
844			|| (IS_915(bufmgr_gem->pci_device)
845			    && tiling == I915_TILING_Y))
846			height_alignment = 8;
847		else if (tiling == I915_TILING_Y)
848			height_alignment = 32;
849		aligned_y = ALIGN(y, height_alignment);
850
851		stride = x * cpp;
852		stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode);
853		size = stride * aligned_y;
854		size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode);
855	} while (*tiling_mode != tiling);
856	*pitch = stride;
857
858	if (tiling == I915_TILING_NONE)
859		stride = 0;
860
861	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags,
862					       tiling, stride);
863}
864
865static drm_intel_bo *
866drm_intel_gem_bo_alloc_userptr(drm_intel_bufmgr *bufmgr,
867				const char *name,
868				void *addr,
869				uint32_t tiling_mode,
870				uint32_t stride,
871				unsigned long size,
872				unsigned long flags)
873{
874	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
875	drm_intel_bo_gem *bo_gem;
876	int ret;
877	struct drm_i915_gem_userptr userptr;
878
879	/* Tiling with userptr surfaces is not supported
880	 * on all hardware so refuse it for time being.
881	 */
882	if (tiling_mode != I915_TILING_NONE)
883		return NULL;
884
885	bo_gem = calloc(1, sizeof(*bo_gem));
886	if (!bo_gem)
887		return NULL;
888
889	bo_gem->bo.size = size;
890
891	VG_CLEAR(userptr);
892	userptr.user_ptr = (__u64)((unsigned long)addr);
893	userptr.user_size = size;
894	userptr.flags = flags;
895
896	ret = drmIoctl(bufmgr_gem->fd,
897			DRM_IOCTL_I915_GEM_USERPTR,
898			&userptr);
899	if (ret != 0) {
900		DBG("bo_create_userptr: "
901		    "ioctl failed with user ptr %p size 0x%lx, "
902		    "user flags 0x%lx\n", addr, size, flags);
903		free(bo_gem);
904		return NULL;
905	}
906
907	bo_gem->gem_handle = userptr.handle;
908	bo_gem->bo.handle = bo_gem->gem_handle;
909	bo_gem->bo.bufmgr    = bufmgr;
910	bo_gem->is_userptr   = true;
911	bo_gem->bo.virtual   = addr;
912	/* Save the address provided by user */
913	bo_gem->user_virtual = addr;
914	bo_gem->tiling_mode  = I915_TILING_NONE;
915	bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
916	bo_gem->stride       = 0;
917
918	DRMINITLISTHEAD(&bo_gem->name_list);
919	DRMINITLISTHEAD(&bo_gem->vma_list);
920
921	bo_gem->name = name;
922	atomic_set(&bo_gem->refcount, 1);
923	bo_gem->validate_index = -1;
924	bo_gem->reloc_tree_fences = 0;
925	bo_gem->used_as_reloc_target = false;
926	bo_gem->has_error = false;
927	bo_gem->reusable = false;
928
929	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
930
931	DBG("bo_create_userptr: "
932	    "ptr %p buf %d (%s) size %ldb, stride 0x%x, tile mode %d\n",
933		addr, bo_gem->gem_handle, bo_gem->name,
934		size, stride, tiling_mode);
935
936	return &bo_gem->bo;
937}
938
939/**
940 * Returns a drm_intel_bo wrapping the given buffer object handle.
941 *
942 * This can be used when one application needs to pass a buffer object
943 * to another.
944 */
945drm_public drm_intel_bo *
946drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr,
947				  const char *name,
948				  unsigned int handle)
949{
950	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
951	drm_intel_bo_gem *bo_gem;
952	int ret;
953	struct drm_gem_open open_arg;
954	struct drm_i915_gem_get_tiling get_tiling;
955	drmMMListHead *list;
956
957	/* At the moment most applications only have a few named bo.
958	 * For instance, in a DRI client only the render buffers passed
959	 * between X and the client are named. And since X returns the
960	 * alternating names for the front/back buffer a linear search
961	 * provides a sufficiently fast match.
962	 */
963	pthread_mutex_lock(&bufmgr_gem->lock);
964	for (list = bufmgr_gem->named.next;
965	     list != &bufmgr_gem->named;
966	     list = list->next) {
967		bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list);
968		if (bo_gem->global_name == handle) {
969			drm_intel_gem_bo_reference(&bo_gem->bo);
970			pthread_mutex_unlock(&bufmgr_gem->lock);
971			return &bo_gem->bo;
972		}
973	}
974
975	VG_CLEAR(open_arg);
976	open_arg.name = handle;
977	ret = drmIoctl(bufmgr_gem->fd,
978		       DRM_IOCTL_GEM_OPEN,
979		       &open_arg);
980	if (ret != 0) {
981		DBG("Couldn't reference %s handle 0x%08x: %s\n",
982		    name, handle, strerror(errno));
983		pthread_mutex_unlock(&bufmgr_gem->lock);
984		return NULL;
985	}
986        /* Now see if someone has used a prime handle to get this
987         * object from the kernel before by looking through the list
988         * again for a matching gem_handle
989         */
990	for (list = bufmgr_gem->named.next;
991	     list != &bufmgr_gem->named;
992	     list = list->next) {
993		bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list);
994		if (bo_gem->gem_handle == open_arg.handle) {
995			drm_intel_gem_bo_reference(&bo_gem->bo);
996			pthread_mutex_unlock(&bufmgr_gem->lock);
997			return &bo_gem->bo;
998		}
999	}
1000
1001	bo_gem = calloc(1, sizeof(*bo_gem));
1002	if (!bo_gem) {
1003		pthread_mutex_unlock(&bufmgr_gem->lock);
1004		return NULL;
1005	}
1006
1007	bo_gem->bo.size = open_arg.size;
1008	bo_gem->bo.offset = 0;
1009	bo_gem->bo.offset64 = 0;
1010	bo_gem->bo.virtual = NULL;
1011	bo_gem->bo.bufmgr = bufmgr;
1012	bo_gem->name = name;
1013	atomic_set(&bo_gem->refcount, 1);
1014	bo_gem->validate_index = -1;
1015	bo_gem->gem_handle = open_arg.handle;
1016	bo_gem->bo.handle = open_arg.handle;
1017	bo_gem->global_name = handle;
1018	bo_gem->reusable = false;
1019
1020	VG_CLEAR(get_tiling);
1021	get_tiling.handle = bo_gem->gem_handle;
1022	ret = drmIoctl(bufmgr_gem->fd,
1023		       DRM_IOCTL_I915_GEM_GET_TILING,
1024		       &get_tiling);
1025	if (ret != 0) {
1026		drm_intel_gem_bo_unreference(&bo_gem->bo);
1027		pthread_mutex_unlock(&bufmgr_gem->lock);
1028		return NULL;
1029	}
1030	bo_gem->tiling_mode = get_tiling.tiling_mode;
1031	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
1032	/* XXX stride is unknown */
1033	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
1034
1035	DRMINITLISTHEAD(&bo_gem->vma_list);
1036	DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
1037	pthread_mutex_unlock(&bufmgr_gem->lock);
1038	DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
1039
1040	return &bo_gem->bo;
1041}
1042
1043static void
1044drm_intel_gem_bo_free(drm_intel_bo *bo)
1045{
1046	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1047	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1048	struct drm_gem_close close;
1049	int ret;
1050
1051	DRMLISTDEL(&bo_gem->vma_list);
1052	if (bo_gem->mem_virtual) {
1053		VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0));
1054		drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
1055		bufmgr_gem->vma_count--;
1056	}
1057	if (bo_gem->gtt_virtual) {
1058		drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
1059		bufmgr_gem->vma_count--;
1060	}
1061
1062	/* Close this object */
1063	VG_CLEAR(close);
1064	close.handle = bo_gem->gem_handle;
1065	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close);
1066	if (ret != 0) {
1067		DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
1068		    bo_gem->gem_handle, bo_gem->name, strerror(errno));
1069	}
1070	free(bo_gem->aub_annotations);
1071	free(bo);
1072}
1073
1074static void
1075drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo)
1076{
1077#if HAVE_VALGRIND
1078	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1079
1080	if (bo_gem->mem_virtual)
1081		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size);
1082
1083	if (bo_gem->gtt_virtual)
1084		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size);
1085#endif
1086}
1087
1088/** Frees all cached buffers significantly older than @time. */
1089static void
1090drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
1091{
1092	int i;
1093
1094	if (bufmgr_gem->time == time)
1095		return;
1096
1097	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
1098		struct drm_intel_gem_bo_bucket *bucket =
1099		    &bufmgr_gem->cache_bucket[i];
1100
1101		while (!DRMLISTEMPTY(&bucket->head)) {
1102			drm_intel_bo_gem *bo_gem;
1103
1104			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1105					      bucket->head.next, head);
1106			if (time - bo_gem->free_time <= 1)
1107				break;
1108
1109			DRMLISTDEL(&bo_gem->head);
1110
1111			drm_intel_gem_bo_free(&bo_gem->bo);
1112		}
1113	}
1114
1115	bufmgr_gem->time = time;
1116}
1117
1118static void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem)
1119{
1120	int limit;
1121
1122	DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__,
1123	    bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max);
1124
1125	if (bufmgr_gem->vma_max < 0)
1126		return;
1127
1128	/* We may need to evict a few entries in order to create new mmaps */
1129	limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open;
1130	if (limit < 0)
1131		limit = 0;
1132
1133	while (bufmgr_gem->vma_count > limit) {
1134		drm_intel_bo_gem *bo_gem;
1135
1136		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1137				      bufmgr_gem->vma_cache.next,
1138				      vma_list);
1139		assert(bo_gem->map_count == 0);
1140		DRMLISTDELINIT(&bo_gem->vma_list);
1141
1142		if (bo_gem->mem_virtual) {
1143			drm_munmap(bo_gem->mem_virtual, bo_gem->bo.size);
1144			bo_gem->mem_virtual = NULL;
1145			bufmgr_gem->vma_count--;
1146		}
1147		if (bo_gem->gtt_virtual) {
1148			drm_munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
1149			bo_gem->gtt_virtual = NULL;
1150			bufmgr_gem->vma_count--;
1151		}
1152	}
1153}
1154
1155static void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem,
1156				       drm_intel_bo_gem *bo_gem)
1157{
1158	bufmgr_gem->vma_open--;
1159	DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache);
1160	if (bo_gem->mem_virtual)
1161		bufmgr_gem->vma_count++;
1162	if (bo_gem->gtt_virtual)
1163		bufmgr_gem->vma_count++;
1164	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
1165}
1166
1167static void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem,
1168				      drm_intel_bo_gem *bo_gem)
1169{
1170	bufmgr_gem->vma_open++;
1171	DRMLISTDEL(&bo_gem->vma_list);
1172	if (bo_gem->mem_virtual)
1173		bufmgr_gem->vma_count--;
1174	if (bo_gem->gtt_virtual)
1175		bufmgr_gem->vma_count--;
1176	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
1177}
1178
1179static void
1180drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
1181{
1182	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1183	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1184	struct drm_intel_gem_bo_bucket *bucket;
1185	int i;
1186
1187	/* Unreference all the target buffers */
1188	for (i = 0; i < bo_gem->reloc_count; i++) {
1189		if (bo_gem->reloc_target_info[i].bo != bo) {
1190			drm_intel_gem_bo_unreference_locked_timed(bo_gem->
1191								  reloc_target_info[i].bo,
1192								  time);
1193		}
1194	}
1195	bo_gem->reloc_count = 0;
1196	bo_gem->used_as_reloc_target = false;
1197
1198	DBG("bo_unreference final: %d (%s)\n",
1199	    bo_gem->gem_handle, bo_gem->name);
1200
1201	/* release memory associated with this object */
1202	if (bo_gem->reloc_target_info) {
1203		free(bo_gem->reloc_target_info);
1204		bo_gem->reloc_target_info = NULL;
1205	}
1206	if (bo_gem->relocs) {
1207		free(bo_gem->relocs);
1208		bo_gem->relocs = NULL;
1209	}
1210
1211	/* Clear any left-over mappings */
1212	if (bo_gem->map_count) {
1213		DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count);
1214		bo_gem->map_count = 0;
1215		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1216		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1217	}
1218
1219	DRMLISTDEL(&bo_gem->name_list);
1220
1221	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
1222	/* Put the buffer into our internal cache for reuse if we can. */
1223	if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
1224	    drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
1225					      I915_MADV_DONTNEED)) {
1226		bo_gem->free_time = time;
1227
1228		bo_gem->name = NULL;
1229		bo_gem->validate_index = -1;
1230
1231		DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
1232	} else {
1233		drm_intel_gem_bo_free(bo);
1234	}
1235}
1236
1237static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
1238						      time_t time)
1239{
1240	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1241
1242	assert(atomic_read(&bo_gem->refcount) > 0);
1243	if (atomic_dec_and_test(&bo_gem->refcount))
1244		drm_intel_gem_bo_unreference_final(bo, time);
1245}
1246
1247static void drm_intel_gem_bo_unreference(drm_intel_bo *bo)
1248{
1249	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1250
1251	assert(atomic_read(&bo_gem->refcount) > 0);
1252
1253	if (atomic_add_unless(&bo_gem->refcount, -1, 1)) {
1254		drm_intel_bufmgr_gem *bufmgr_gem =
1255		    (drm_intel_bufmgr_gem *) bo->bufmgr;
1256		struct timespec time;
1257
1258		clock_gettime(CLOCK_MONOTONIC, &time);
1259
1260		pthread_mutex_lock(&bufmgr_gem->lock);
1261
1262		if (atomic_dec_and_test(&bo_gem->refcount)) {
1263			drm_intel_gem_bo_unreference_final(bo, time.tv_sec);
1264			drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec);
1265		}
1266
1267		pthread_mutex_unlock(&bufmgr_gem->lock);
1268	}
1269}
1270
1271static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
1272{
1273	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1274	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1275	struct drm_i915_gem_set_domain set_domain;
1276	int ret;
1277
1278	if (bo_gem->is_userptr) {
1279		/* Return the same user ptr */
1280		bo->virtual = bo_gem->user_virtual;
1281		return 0;
1282	}
1283
1284	pthread_mutex_lock(&bufmgr_gem->lock);
1285
1286	if (bo_gem->map_count++ == 0)
1287		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
1288
1289	if (!bo_gem->mem_virtual) {
1290		struct drm_i915_gem_mmap mmap_arg;
1291
1292		DBG("bo_map: %d (%s), map_count=%d\n",
1293		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1294
1295		VG_CLEAR(mmap_arg);
1296		mmap_arg.handle = bo_gem->gem_handle;
1297		mmap_arg.offset = 0;
1298		mmap_arg.size = bo->size;
1299		ret = drmIoctl(bufmgr_gem->fd,
1300			       DRM_IOCTL_I915_GEM_MMAP,
1301			       &mmap_arg);
1302		if (ret != 0) {
1303			ret = -errno;
1304			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1305			    __FILE__, __LINE__, bo_gem->gem_handle,
1306			    bo_gem->name, strerror(errno));
1307			if (--bo_gem->map_count == 0)
1308				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1309			pthread_mutex_unlock(&bufmgr_gem->lock);
1310			return ret;
1311		}
1312		VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
1313		bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
1314	}
1315	DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1316	    bo_gem->mem_virtual);
1317	bo->virtual = bo_gem->mem_virtual;
1318
1319	VG_CLEAR(set_domain);
1320	set_domain.handle = bo_gem->gem_handle;
1321	set_domain.read_domains = I915_GEM_DOMAIN_CPU;
1322	if (write_enable)
1323		set_domain.write_domain = I915_GEM_DOMAIN_CPU;
1324	else
1325		set_domain.write_domain = 0;
1326	ret = drmIoctl(bufmgr_gem->fd,
1327		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1328		       &set_domain);
1329	if (ret != 0) {
1330		DBG("%s:%d: Error setting to CPU domain %d: %s\n",
1331		    __FILE__, __LINE__, bo_gem->gem_handle,
1332		    strerror(errno));
1333	}
1334
1335	if (write_enable)
1336		bo_gem->mapped_cpu_write = true;
1337
1338	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1339	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
1340	pthread_mutex_unlock(&bufmgr_gem->lock);
1341
1342	return 0;
1343}
1344
1345static int
1346map_gtt(drm_intel_bo *bo)
1347{
1348	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1349	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1350	int ret;
1351
1352	if (bo_gem->is_userptr)
1353		return -EINVAL;
1354
1355	if (bo_gem->map_count++ == 0)
1356		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
1357
1358	/* Get a mapping of the buffer if we haven't before. */
1359	if (bo_gem->gtt_virtual == NULL) {
1360		struct drm_i915_gem_mmap_gtt mmap_arg;
1361
1362		DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
1363		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1364
1365		VG_CLEAR(mmap_arg);
1366		mmap_arg.handle = bo_gem->gem_handle;
1367
1368		/* Get the fake offset back... */
1369		ret = drmIoctl(bufmgr_gem->fd,
1370			       DRM_IOCTL_I915_GEM_MMAP_GTT,
1371			       &mmap_arg);
1372		if (ret != 0) {
1373			ret = -errno;
1374			DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
1375			    __FILE__, __LINE__,
1376			    bo_gem->gem_handle, bo_gem->name,
1377			    strerror(errno));
1378			if (--bo_gem->map_count == 0)
1379				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1380			return ret;
1381		}
1382
1383		/* and mmap it */
1384		bo_gem->gtt_virtual = drm_mmap(0, bo->size, PROT_READ | PROT_WRITE,
1385					       MAP_SHARED, bufmgr_gem->fd,
1386					       mmap_arg.offset);
1387		if (bo_gem->gtt_virtual == MAP_FAILED) {
1388			bo_gem->gtt_virtual = NULL;
1389			ret = -errno;
1390			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1391			    __FILE__, __LINE__,
1392			    bo_gem->gem_handle, bo_gem->name,
1393			    strerror(errno));
1394			if (--bo_gem->map_count == 0)
1395				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1396			return ret;
1397		}
1398	}
1399
1400	bo->virtual = bo_gem->gtt_virtual;
1401
1402	DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1403	    bo_gem->gtt_virtual);
1404
1405	return 0;
1406}
1407
1408drm_public int
1409drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
1410{
1411	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1412	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1413	struct drm_i915_gem_set_domain set_domain;
1414	int ret;
1415
1416	pthread_mutex_lock(&bufmgr_gem->lock);
1417
1418	ret = map_gtt(bo);
1419	if (ret) {
1420		pthread_mutex_unlock(&bufmgr_gem->lock);
1421		return ret;
1422	}
1423
1424	/* Now move it to the GTT domain so that the GPU and CPU
1425	 * caches are flushed and the GPU isn't actively using the
1426	 * buffer.
1427	 *
1428	 * The pagefault handler does this domain change for us when
1429	 * it has unbound the BO from the GTT, but it's up to us to
1430	 * tell it when we're about to use things if we had done
1431	 * rendering and it still happens to be bound to the GTT.
1432	 */
1433	VG_CLEAR(set_domain);
1434	set_domain.handle = bo_gem->gem_handle;
1435	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1436	set_domain.write_domain = I915_GEM_DOMAIN_GTT;
1437	ret = drmIoctl(bufmgr_gem->fd,
1438		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1439		       &set_domain);
1440	if (ret != 0) {
1441		DBG("%s:%d: Error setting domain %d: %s\n",
1442		    __FILE__, __LINE__, bo_gem->gem_handle,
1443		    strerror(errno));
1444	}
1445
1446	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1447	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1448	pthread_mutex_unlock(&bufmgr_gem->lock);
1449
1450	return 0;
1451}
1452
1453/**
1454 * Performs a mapping of the buffer object like the normal GTT
1455 * mapping, but avoids waiting for the GPU to be done reading from or
1456 * rendering to the buffer.
1457 *
1458 * This is used in the implementation of GL_ARB_map_buffer_range: The
1459 * user asks to create a buffer, then does a mapping, fills some
1460 * space, runs a drawing command, then asks to map it again without
1461 * synchronizing because it guarantees that it won't write over the
1462 * data that the GPU is busy using (or, more specifically, that if it
1463 * does write over the data, it acknowledges that rendering is
1464 * undefined).
1465 */
1466
1467drm_public int
1468drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
1469{
1470	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1471#ifdef HAVE_VALGRIND
1472	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1473#endif
1474	int ret;
1475
1476	/* If the CPU cache isn't coherent with the GTT, then use a
1477	 * regular synchronized mapping.  The problem is that we don't
1478	 * track where the buffer was last used on the CPU side in
1479	 * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so
1480	 * we would potentially corrupt the buffer even when the user
1481	 * does reasonable things.
1482	 */
1483	if (!bufmgr_gem->has_llc)
1484		return drm_intel_gem_bo_map_gtt(bo);
1485
1486	pthread_mutex_lock(&bufmgr_gem->lock);
1487
1488	ret = map_gtt(bo);
1489	if (ret == 0) {
1490		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1491		VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1492	}
1493
1494	pthread_mutex_unlock(&bufmgr_gem->lock);
1495
1496	return ret;
1497}
1498
1499static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
1500{
1501	drm_intel_bufmgr_gem *bufmgr_gem;
1502	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1503	int ret = 0;
1504
1505	if (bo == NULL)
1506		return 0;
1507
1508	if (bo_gem->is_userptr)
1509		return 0;
1510
1511	bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1512
1513	pthread_mutex_lock(&bufmgr_gem->lock);
1514
1515	if (bo_gem->map_count <= 0) {
1516		DBG("attempted to unmap an unmapped bo\n");
1517		pthread_mutex_unlock(&bufmgr_gem->lock);
1518		/* Preserve the old behaviour of just treating this as a
1519		 * no-op rather than reporting the error.
1520		 */
1521		return 0;
1522	}
1523
1524	if (bo_gem->mapped_cpu_write) {
1525		struct drm_i915_gem_sw_finish sw_finish;
1526
1527		/* Cause a flush to happen if the buffer's pinned for
1528		 * scanout, so the results show up in a timely manner.
1529		 * Unlike GTT set domains, this only does work if the
1530		 * buffer should be scanout-related.
1531		 */
1532		VG_CLEAR(sw_finish);
1533		sw_finish.handle = bo_gem->gem_handle;
1534		ret = drmIoctl(bufmgr_gem->fd,
1535			       DRM_IOCTL_I915_GEM_SW_FINISH,
1536			       &sw_finish);
1537		ret = ret == -1 ? -errno : 0;
1538
1539		bo_gem->mapped_cpu_write = false;
1540	}
1541
1542	/* We need to unmap after every innovation as we cannot track
1543	 * an open vma for every bo as that will exhaasut the system
1544	 * limits and cause later failures.
1545	 */
1546	if (--bo_gem->map_count == 0) {
1547		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1548		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1549		bo->virtual = NULL;
1550	}
1551	pthread_mutex_unlock(&bufmgr_gem->lock);
1552
1553	return ret;
1554}
1555
1556drm_public int
1557drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo)
1558{
1559	return drm_intel_gem_bo_unmap(bo);
1560}
1561
1562static int
1563drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
1564			 unsigned long size, const void *data)
1565{
1566	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1567	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1568	struct drm_i915_gem_pwrite pwrite;
1569	int ret;
1570
1571	if (bo_gem->is_userptr)
1572		return -EINVAL;
1573
1574	VG_CLEAR(pwrite);
1575	pwrite.handle = bo_gem->gem_handle;
1576	pwrite.offset = offset;
1577	pwrite.size = size;
1578	pwrite.data_ptr = (uint64_t) (uintptr_t) data;
1579	ret = drmIoctl(bufmgr_gem->fd,
1580		       DRM_IOCTL_I915_GEM_PWRITE,
1581		       &pwrite);
1582	if (ret != 0) {
1583		ret = -errno;
1584		DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
1585		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1586		    (int)size, strerror(errno));
1587	}
1588
1589	return ret;
1590}
1591
1592static int
1593drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id)
1594{
1595	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1596	struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id;
1597	int ret;
1598
1599	VG_CLEAR(get_pipe_from_crtc_id);
1600	get_pipe_from_crtc_id.crtc_id = crtc_id;
1601	ret = drmIoctl(bufmgr_gem->fd,
1602		       DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID,
1603		       &get_pipe_from_crtc_id);
1604	if (ret != 0) {
1605		/* We return -1 here to signal that we don't
1606		 * know which pipe is associated with this crtc.
1607		 * This lets the caller know that this information
1608		 * isn't available; using the wrong pipe for
1609		 * vblank waiting can cause the chipset to lock up
1610		 */
1611		return -1;
1612	}
1613
1614	return get_pipe_from_crtc_id.pipe;
1615}
1616
1617static int
1618drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
1619			     unsigned long size, void *data)
1620{
1621	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1622	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1623	struct drm_i915_gem_pread pread;
1624	int ret;
1625
1626	if (bo_gem->is_userptr)
1627		return -EINVAL;
1628
1629	VG_CLEAR(pread);
1630	pread.handle = bo_gem->gem_handle;
1631	pread.offset = offset;
1632	pread.size = size;
1633	pread.data_ptr = (uint64_t) (uintptr_t) data;
1634	ret = drmIoctl(bufmgr_gem->fd,
1635		       DRM_IOCTL_I915_GEM_PREAD,
1636		       &pread);
1637	if (ret != 0) {
1638		ret = -errno;
1639		DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
1640		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1641		    (int)size, strerror(errno));
1642	}
1643
1644	return ret;
1645}
1646
1647/** Waits for all GPU rendering with the object to have completed. */
1648static void
1649drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo)
1650{
1651	drm_intel_gem_bo_start_gtt_access(bo, 1);
1652}
1653
1654/**
1655 * Waits on a BO for the given amount of time.
1656 *
1657 * @bo: buffer object to wait for
1658 * @timeout_ns: amount of time to wait in nanoseconds.
1659 *   If value is less than 0, an infinite wait will occur.
1660 *
1661 * Returns 0 if the wait was successful ie. the last batch referencing the
1662 * object has completed within the allotted time. Otherwise some negative return
1663 * value describes the error. Of particular interest is -ETIME when the wait has
1664 * failed to yield the desired result.
1665 *
1666 * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows
1667 * the operation to give up after a certain amount of time. Another subtle
1668 * difference is the internal locking semantics are different (this variant does
1669 * not hold the lock for the duration of the wait). This makes the wait subject
1670 * to a larger userspace race window.
1671 *
1672 * The implementation shall wait until the object is no longer actively
1673 * referenced within a batch buffer at the time of the call. The wait will
1674 * not guarantee that the buffer is re-issued via another thread, or an flinked
1675 * handle. Userspace must make sure this race does not occur if such precision
1676 * is important.
1677 */
1678drm_public int
1679drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns)
1680{
1681	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1682	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1683	struct drm_i915_gem_wait wait;
1684	int ret;
1685
1686	if (!bufmgr_gem->has_wait_timeout) {
1687		DBG("%s:%d: Timed wait is not supported. Falling back to "
1688		    "infinite wait\n", __FILE__, __LINE__);
1689		if (timeout_ns) {
1690			drm_intel_gem_bo_wait_rendering(bo);
1691			return 0;
1692		} else {
1693			return drm_intel_gem_bo_busy(bo) ? -ETIME : 0;
1694		}
1695	}
1696
1697	wait.bo_handle = bo_gem->gem_handle;
1698	wait.timeout_ns = timeout_ns;
1699	wait.flags = 0;
1700	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
1701	if (ret == -1)
1702		return -errno;
1703
1704	return ret;
1705}
1706
1707/**
1708 * Sets the object to the GTT read and possibly write domain, used by the X
1709 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt().
1710 *
1711 * In combination with drm_intel_gem_bo_pin() and manual fence management, we
1712 * can do tiled pixmaps this way.
1713 */
1714drm_public void
1715drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable)
1716{
1717	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1718	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1719	struct drm_i915_gem_set_domain set_domain;
1720	int ret;
1721
1722	VG_CLEAR(set_domain);
1723	set_domain.handle = bo_gem->gem_handle;
1724	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1725	set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
1726	ret = drmIoctl(bufmgr_gem->fd,
1727		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1728		       &set_domain);
1729	if (ret != 0) {
1730		DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
1731		    __FILE__, __LINE__, bo_gem->gem_handle,
1732		    set_domain.read_domains, set_domain.write_domain,
1733		    strerror(errno));
1734	}
1735}
1736
1737static void
1738drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
1739{
1740	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1741	int i;
1742
1743	free(bufmgr_gem->exec2_objects);
1744	free(bufmgr_gem->exec_objects);
1745	free(bufmgr_gem->exec_bos);
1746	free(bufmgr_gem->aub_filename);
1747
1748	pthread_mutex_destroy(&bufmgr_gem->lock);
1749
1750	/* Free any cached buffer objects we were going to reuse */
1751	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
1752		struct drm_intel_gem_bo_bucket *bucket =
1753		    &bufmgr_gem->cache_bucket[i];
1754		drm_intel_bo_gem *bo_gem;
1755
1756		while (!DRMLISTEMPTY(&bucket->head)) {
1757			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1758					      bucket->head.next, head);
1759			DRMLISTDEL(&bo_gem->head);
1760
1761			drm_intel_gem_bo_free(&bo_gem->bo);
1762		}
1763	}
1764
1765	free(bufmgr);
1766}
1767
1768/**
1769 * Adds the target buffer to the validation list and adds the relocation
1770 * to the reloc_buffer's relocation list.
1771 *
1772 * The relocation entry at the given offset must already contain the
1773 * precomputed relocation value, because the kernel will optimize out
1774 * the relocation entry write when the buffer hasn't moved from the
1775 * last known offset in target_bo.
1776 */
1777static int
1778do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
1779		 drm_intel_bo *target_bo, uint32_t target_offset,
1780		 uint32_t read_domains, uint32_t write_domain,
1781		 bool need_fence)
1782{
1783	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1784	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1785	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
1786	bool fenced_command;
1787
1788	if (bo_gem->has_error)
1789		return -ENOMEM;
1790
1791	if (target_bo_gem->has_error) {
1792		bo_gem->has_error = true;
1793		return -ENOMEM;
1794	}
1795
1796	/* We never use HW fences for rendering on 965+ */
1797	if (bufmgr_gem->gen >= 4)
1798		need_fence = false;
1799
1800	fenced_command = need_fence;
1801	if (target_bo_gem->tiling_mode == I915_TILING_NONE)
1802		need_fence = false;
1803
1804	/* Create a new relocation list if needed */
1805	if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo))
1806		return -ENOMEM;
1807
1808	/* Check overflow */
1809	assert(bo_gem->reloc_count < bufmgr_gem->max_relocs);
1810
1811	/* Check args */
1812	assert(offset <= bo->size - 4);
1813	assert((write_domain & (write_domain - 1)) == 0);
1814
1815	/* An object needing a fence is a tiled buffer, so it won't have
1816	 * relocs to other buffers.
1817	 */
1818	if (need_fence) {
1819		assert(target_bo_gem->reloc_count == 0);
1820		target_bo_gem->reloc_tree_fences = 1;
1821	}
1822
1823	/* Make sure that we're not adding a reloc to something whose size has
1824	 * already been accounted for.
1825	 */
1826	assert(!bo_gem->used_as_reloc_target);
1827	if (target_bo_gem != bo_gem) {
1828		target_bo_gem->used_as_reloc_target = true;
1829		bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
1830		bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
1831	}
1832
1833	bo_gem->relocs[bo_gem->reloc_count].offset = offset;
1834	bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
1835	bo_gem->relocs[bo_gem->reloc_count].target_handle =
1836	    target_bo_gem->gem_handle;
1837	bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
1838	bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
1839	bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64;
1840
1841	bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
1842	if (target_bo != bo)
1843		drm_intel_gem_bo_reference(target_bo);
1844	if (fenced_command)
1845		bo_gem->reloc_target_info[bo_gem->reloc_count].flags =
1846			DRM_INTEL_RELOC_FENCE;
1847	else
1848		bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
1849
1850	bo_gem->reloc_count++;
1851
1852	return 0;
1853}
1854
1855static int
1856drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
1857			    drm_intel_bo *target_bo, uint32_t target_offset,
1858			    uint32_t read_domains, uint32_t write_domain)
1859{
1860	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
1861
1862	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
1863				read_domains, write_domain,
1864				!bufmgr_gem->fenced_relocs);
1865}
1866
1867static int
1868drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
1869				  drm_intel_bo *target_bo,
1870				  uint32_t target_offset,
1871				  uint32_t read_domains, uint32_t write_domain)
1872{
1873	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
1874				read_domains, write_domain, true);
1875}
1876
1877drm_public int
1878drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo)
1879{
1880	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1881
1882	return bo_gem->reloc_count;
1883}
1884
1885/**
1886 * Removes existing relocation entries in the BO after "start".
1887 *
1888 * This allows a user to avoid a two-step process for state setup with
1889 * counting up all the buffer objects and doing a
1890 * drm_intel_bufmgr_check_aperture_space() before emitting any of the
1891 * relocations for the state setup.  Instead, save the state of the
1892 * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the
1893 * state, and then check if it still fits in the aperture.
1894 *
1895 * Any further drm_intel_bufmgr_check_aperture_space() queries
1896 * involving this buffer in the tree are undefined after this call.
1897 */
1898drm_public void
1899drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start)
1900{
1901	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1902	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1903	int i;
1904	struct timespec time;
1905
1906	clock_gettime(CLOCK_MONOTONIC, &time);
1907
1908	assert(bo_gem->reloc_count >= start);
1909
1910	/* Unreference the cleared target buffers */
1911	pthread_mutex_lock(&bufmgr_gem->lock);
1912
1913	for (i = start; i < bo_gem->reloc_count; i++) {
1914		drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo;
1915		if (&target_bo_gem->bo != bo) {
1916			bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences;
1917			drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo,
1918								  time.tv_sec);
1919		}
1920	}
1921	bo_gem->reloc_count = start;
1922
1923	pthread_mutex_unlock(&bufmgr_gem->lock);
1924
1925}
1926
1927/**
1928 * Walk the tree of relocations rooted at BO and accumulate the list of
1929 * validations to be performed and update the relocation buffers with
1930 * index values into the validation list.
1931 */
1932static void
1933drm_intel_gem_bo_process_reloc(drm_intel_bo *bo)
1934{
1935	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1936	int i;
1937
1938	if (bo_gem->relocs == NULL)
1939		return;
1940
1941	for (i = 0; i < bo_gem->reloc_count; i++) {
1942		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
1943
1944		if (target_bo == bo)
1945			continue;
1946
1947		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1948
1949		/* Continue walking the tree depth-first. */
1950		drm_intel_gem_bo_process_reloc(target_bo);
1951
1952		/* Add the target to the validate list */
1953		drm_intel_add_validate_buffer(target_bo);
1954	}
1955}
1956
1957static void
1958drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
1959{
1960	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1961	int i;
1962
1963	if (bo_gem->relocs == NULL)
1964		return;
1965
1966	for (i = 0; i < bo_gem->reloc_count; i++) {
1967		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
1968		int need_fence;
1969
1970		if (target_bo == bo)
1971			continue;
1972
1973		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1974
1975		/* Continue walking the tree depth-first. */
1976		drm_intel_gem_bo_process_reloc2(target_bo);
1977
1978		need_fence = (bo_gem->reloc_target_info[i].flags &
1979			      DRM_INTEL_RELOC_FENCE);
1980
1981		/* Add the target to the validate list */
1982		drm_intel_add_validate_buffer2(target_bo, need_fence);
1983	}
1984}
1985
1986
1987static void
1988drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem)
1989{
1990	int i;
1991
1992	for (i = 0; i < bufmgr_gem->exec_count; i++) {
1993		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
1994		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1995
1996		/* Update the buffer offset */
1997		if (bufmgr_gem->exec_objects[i].offset != bo->offset64) {
1998			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
1999			    bo_gem->gem_handle, bo_gem->name, bo->offset64,
2000			    (unsigned long long)bufmgr_gem->exec_objects[i].
2001			    offset);
2002			bo->offset64 = bufmgr_gem->exec_objects[i].offset;
2003			bo->offset = bufmgr_gem->exec_objects[i].offset;
2004		}
2005	}
2006}
2007
2008static void
2009drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
2010{
2011	int i;
2012
2013	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2014		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
2015		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
2016
2017		/* Update the buffer offset */
2018		if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
2019			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
2020			    bo_gem->gem_handle, bo_gem->name, bo->offset64,
2021			    (unsigned long long)bufmgr_gem->exec2_objects[i].offset);
2022			bo->offset64 = bufmgr_gem->exec2_objects[i].offset;
2023			bo->offset = bufmgr_gem->exec2_objects[i].offset;
2024		}
2025	}
2026}
2027
2028static void
2029aub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data)
2030{
2031	fwrite(&data, 1, 4, bufmgr_gem->aub_file);
2032}
2033
2034static void
2035aub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size)
2036{
2037	fwrite(data, 1, size, bufmgr_gem->aub_file);
2038}
2039
2040static void
2041aub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size)
2042{
2043	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2044	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2045	uint32_t *data;
2046	unsigned int i;
2047
2048	data = malloc(bo->size);
2049	drm_intel_bo_get_subdata(bo, offset, size, data);
2050
2051	/* Easy mode: write out bo with no relocations */
2052	if (!bo_gem->reloc_count) {
2053		aub_out_data(bufmgr_gem, data, size);
2054		free(data);
2055		return;
2056	}
2057
2058	/* Otherwise, handle the relocations while writing. */
2059	for (i = 0; i < size / 4; i++) {
2060		int r;
2061		for (r = 0; r < bo_gem->reloc_count; r++) {
2062			struct drm_i915_gem_relocation_entry *reloc;
2063			drm_intel_reloc_target *info;
2064
2065			reloc = &bo_gem->relocs[r];
2066			info = &bo_gem->reloc_target_info[r];
2067
2068			if (reloc->offset == offset + i * 4) {
2069				drm_intel_bo_gem *target_gem;
2070				uint32_t val;
2071
2072				target_gem = (drm_intel_bo_gem *)info->bo;
2073
2074				val = reloc->delta;
2075				val += target_gem->aub_offset;
2076
2077				aub_out(bufmgr_gem, val);
2078				data[i] = val;
2079				break;
2080			}
2081		}
2082		if (r == bo_gem->reloc_count) {
2083			/* no relocation, just the data */
2084			aub_out(bufmgr_gem, data[i]);
2085		}
2086	}
2087
2088	free(data);
2089}
2090
2091static void
2092aub_bo_get_address(drm_intel_bo *bo)
2093{
2094	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2095	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2096
2097	/* Give the object a graphics address in the AUB file.  We
2098	 * don't just use the GEM object address because we do AUB
2099	 * dumping before execution -- we want to successfully log
2100	 * when the hardware might hang, and we might even want to aub
2101	 * capture for a driver trying to execute on a different
2102	 * generation of hardware by disabling the actual kernel exec
2103	 * call.
2104	 */
2105	bo_gem->aub_offset = bufmgr_gem->aub_offset;
2106	bufmgr_gem->aub_offset += bo->size;
2107	/* XXX: Handle aperture overflow. */
2108	assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024);
2109}
2110
2111static void
2112aub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype,
2113		      uint32_t offset, uint32_t size)
2114{
2115	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2116	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2117
2118	aub_out(bufmgr_gem,
2119		CMD_AUB_TRACE_HEADER_BLOCK |
2120		((bufmgr_gem->gen >= 8 ? 6 : 5) - 2));
2121	aub_out(bufmgr_gem,
2122		AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE);
2123	aub_out(bufmgr_gem, subtype);
2124	aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
2125	aub_out(bufmgr_gem, size);
2126	if (bufmgr_gem->gen >= 8)
2127		aub_out(bufmgr_gem, 0);
2128	aub_write_bo_data(bo, offset, size);
2129}
2130
2131/**
2132 * Break up large objects into multiple writes.  Otherwise a 128kb VBO
2133 * would overflow the 16 bits of size field in the packet header and
2134 * everything goes badly after that.
2135 */
2136static void
2137aub_write_large_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype,
2138			    uint32_t offset, uint32_t size)
2139{
2140	uint32_t block_size;
2141	uint32_t sub_offset;
2142
2143	for (sub_offset = 0; sub_offset < size; sub_offset += block_size) {
2144		block_size = size - sub_offset;
2145
2146		if (block_size > 8 * 4096)
2147			block_size = 8 * 4096;
2148
2149		aub_write_trace_block(bo, type, subtype, offset + sub_offset,
2150				      block_size);
2151	}
2152}
2153
2154static void
2155aub_write_bo(drm_intel_bo *bo)
2156{
2157	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2158	uint32_t offset = 0;
2159	unsigned i;
2160
2161	aub_bo_get_address(bo);
2162
2163	/* Write out each annotated section separately. */
2164	for (i = 0; i < bo_gem->aub_annotation_count; ++i) {
2165		drm_intel_aub_annotation *annotation =
2166			&bo_gem->aub_annotations[i];
2167		uint32_t ending_offset = annotation->ending_offset;
2168		if (ending_offset > bo->size)
2169			ending_offset = bo->size;
2170		if (ending_offset > offset) {
2171			aub_write_large_trace_block(bo, annotation->type,
2172						    annotation->subtype,
2173						    offset,
2174						    ending_offset - offset);
2175			offset = ending_offset;
2176		}
2177	}
2178
2179	/* Write out any remaining unannotated data */
2180	if (offset < bo->size) {
2181		aub_write_large_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0,
2182					    offset, bo->size - offset);
2183	}
2184}
2185
2186/*
2187 * Make a ringbuffer on fly and dump it
2188 */
2189static void
2190aub_build_dump_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem,
2191			  uint32_t batch_buffer, int ring_flag)
2192{
2193	uint32_t ringbuffer[4096];
2194	int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */
2195	int ring_count = 0;
2196
2197	if (ring_flag == I915_EXEC_BSD)
2198		ring = AUB_TRACE_TYPE_RING_PRB1;
2199	else if (ring_flag == I915_EXEC_BLT)
2200		ring = AUB_TRACE_TYPE_RING_PRB2;
2201
2202	/* Make a ring buffer to execute our batchbuffer. */
2203	memset(ringbuffer, 0, sizeof(ringbuffer));
2204	if (bufmgr_gem->gen >= 8) {
2205		ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2);
2206		ringbuffer[ring_count++] = batch_buffer;
2207		ringbuffer[ring_count++] = 0;
2208	} else {
2209		ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START;
2210		ringbuffer[ring_count++] = batch_buffer;
2211	}
2212
2213	/* Write out the ring.  This appears to trigger execution of
2214	 * the ring in the simulator.
2215	 */
2216	aub_out(bufmgr_gem,
2217		CMD_AUB_TRACE_HEADER_BLOCK |
2218		((bufmgr_gem->gen >= 8 ? 6 : 5) - 2));
2219	aub_out(bufmgr_gem,
2220		AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE);
2221	aub_out(bufmgr_gem, 0); /* general/surface subtype */
2222	aub_out(bufmgr_gem, bufmgr_gem->aub_offset);
2223	aub_out(bufmgr_gem, ring_count * 4);
2224	if (bufmgr_gem->gen >= 8)
2225		aub_out(bufmgr_gem, 0);
2226
2227	/* FIXME: Need some flush operations here? */
2228	aub_out_data(bufmgr_gem, ringbuffer, ring_count * 4);
2229
2230	/* Update offset pointer */
2231	bufmgr_gem->aub_offset += 4096;
2232}
2233
2234drm_public void
2235drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo,
2236			      int x1, int y1, int width, int height,
2237			      enum aub_dump_bmp_format format,
2238			      int pitch, int offset)
2239{
2240	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2241	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
2242	uint32_t cpp;
2243
2244	switch (format) {
2245	case AUB_DUMP_BMP_FORMAT_8BIT:
2246		cpp = 1;
2247		break;
2248	case AUB_DUMP_BMP_FORMAT_ARGB_4444:
2249		cpp = 2;
2250		break;
2251	case AUB_DUMP_BMP_FORMAT_ARGB_0888:
2252	case AUB_DUMP_BMP_FORMAT_ARGB_8888:
2253		cpp = 4;
2254		break;
2255	default:
2256		printf("Unknown AUB dump format %d\n", format);
2257		return;
2258	}
2259
2260	if (!bufmgr_gem->aub_file)
2261		return;
2262
2263	aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4);
2264	aub_out(bufmgr_gem, (y1 << 16) | x1);
2265	aub_out(bufmgr_gem,
2266		(format << 24) |
2267		(cpp << 19) |
2268		pitch / 4);
2269	aub_out(bufmgr_gem, (height << 16) | width);
2270	aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
2271	aub_out(bufmgr_gem,
2272		((bo_gem->tiling_mode != I915_TILING_NONE) ? (1 << 2) : 0) |
2273		((bo_gem->tiling_mode == I915_TILING_Y) ? (1 << 3) : 0));
2274}
2275
2276static void
2277aub_exec(drm_intel_bo *bo, int ring_flag, int used)
2278{
2279	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2280	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2281	int i;
2282	bool batch_buffer_needs_annotations;
2283
2284	if (!bufmgr_gem->aub_file)
2285		return;
2286
2287	/* If batch buffer is not annotated, annotate it the best we
2288	 * can.
2289	 */
2290	batch_buffer_needs_annotations = bo_gem->aub_annotation_count == 0;
2291	if (batch_buffer_needs_annotations) {
2292		drm_intel_aub_annotation annotations[2] = {
2293			{ AUB_TRACE_TYPE_BATCH, 0, used },
2294			{ AUB_TRACE_TYPE_NOTYPE, 0, bo->size }
2295		};
2296		drm_intel_bufmgr_gem_set_aub_annotations(bo, annotations, 2);
2297	}
2298
2299	/* Write out all buffers to AUB memory */
2300	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2301		aub_write_bo(bufmgr_gem->exec_bos[i]);
2302	}
2303
2304	/* Remove any annotations we added */
2305	if (batch_buffer_needs_annotations)
2306		drm_intel_bufmgr_gem_set_aub_annotations(bo, NULL, 0);
2307
2308	/* Dump ring buffer */
2309	aub_build_dump_ringbuffer(bufmgr_gem, bo_gem->aub_offset, ring_flag);
2310
2311	fflush(bufmgr_gem->aub_file);
2312
2313	/*
2314	 * One frame has been dumped. So reset the aub_offset for the next frame.
2315	 *
2316	 * FIXME: Can we do this?
2317	 */
2318	bufmgr_gem->aub_offset = 0x10000;
2319}
2320
2321static int
2322drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
2323		      drm_clip_rect_t * cliprects, int num_cliprects, int DR4)
2324{
2325	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2326	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2327	struct drm_i915_gem_execbuffer execbuf;
2328	int ret, i;
2329
2330	if (bo_gem->has_error)
2331		return -ENOMEM;
2332
2333	pthread_mutex_lock(&bufmgr_gem->lock);
2334	/* Update indices and set up the validate list. */
2335	drm_intel_gem_bo_process_reloc(bo);
2336
2337	/* Add the batch buffer to the validation list.  There are no
2338	 * relocations pointing to it.
2339	 */
2340	drm_intel_add_validate_buffer(bo);
2341
2342	VG_CLEAR(execbuf);
2343	execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects;
2344	execbuf.buffer_count = bufmgr_gem->exec_count;
2345	execbuf.batch_start_offset = 0;
2346	execbuf.batch_len = used;
2347	execbuf.cliprects_ptr = (uintptr_t) cliprects;
2348	execbuf.num_cliprects = num_cliprects;
2349	execbuf.DR1 = 0;
2350	execbuf.DR4 = DR4;
2351
2352	ret = drmIoctl(bufmgr_gem->fd,
2353		       DRM_IOCTL_I915_GEM_EXECBUFFER,
2354		       &execbuf);
2355	if (ret != 0) {
2356		ret = -errno;
2357		if (errno == ENOSPC) {
2358			DBG("Execbuffer fails to pin. "
2359			    "Estimate: %u. Actual: %u. Available: %u\n",
2360			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
2361							       bufmgr_gem->
2362							       exec_count),
2363			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
2364							      bufmgr_gem->
2365							      exec_count),
2366			    (unsigned int)bufmgr_gem->gtt_size);
2367		}
2368	}
2369	drm_intel_update_buffer_offsets(bufmgr_gem);
2370
2371	if (bufmgr_gem->bufmgr.debug)
2372		drm_intel_gem_dump_validation_list(bufmgr_gem);
2373
2374	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2375		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
2376		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2377
2378		bo_gem->idle = false;
2379
2380		/* Disconnect the buffer from the validate list */
2381		bo_gem->validate_index = -1;
2382		bufmgr_gem->exec_bos[i] = NULL;
2383	}
2384	bufmgr_gem->exec_count = 0;
2385	pthread_mutex_unlock(&bufmgr_gem->lock);
2386
2387	return ret;
2388}
2389
2390static int
2391do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx,
2392	 drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
2393	 unsigned int flags)
2394{
2395	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
2396	struct drm_i915_gem_execbuffer2 execbuf;
2397	int ret = 0;
2398	int i;
2399
2400	switch (flags & 0x7) {
2401	default:
2402		return -EINVAL;
2403	case I915_EXEC_BLT:
2404		if (!bufmgr_gem->has_blt)
2405			return -EINVAL;
2406		break;
2407	case I915_EXEC_BSD:
2408		if (!bufmgr_gem->has_bsd)
2409			return -EINVAL;
2410		break;
2411	case I915_EXEC_VEBOX:
2412		if (!bufmgr_gem->has_vebox)
2413			return -EINVAL;
2414		break;
2415	case I915_EXEC_RENDER:
2416	case I915_EXEC_DEFAULT:
2417		break;
2418	}
2419
2420	pthread_mutex_lock(&bufmgr_gem->lock);
2421	/* Update indices and set up the validate list. */
2422	drm_intel_gem_bo_process_reloc2(bo);
2423
2424	/* Add the batch buffer to the validation list.  There are no relocations
2425	 * pointing to it.
2426	 */
2427	drm_intel_add_validate_buffer2(bo, 0);
2428
2429	VG_CLEAR(execbuf);
2430	execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
2431	execbuf.buffer_count = bufmgr_gem->exec_count;
2432	execbuf.batch_start_offset = 0;
2433	execbuf.batch_len = used;
2434	execbuf.cliprects_ptr = (uintptr_t)cliprects;
2435	execbuf.num_cliprects = num_cliprects;
2436	execbuf.DR1 = 0;
2437	execbuf.DR4 = DR4;
2438	execbuf.flags = flags;
2439	if (ctx == NULL)
2440		i915_execbuffer2_set_context_id(execbuf, 0);
2441	else
2442		i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id);
2443	execbuf.rsvd2 = 0;
2444
2445	aub_exec(bo, flags, used);
2446
2447	if (bufmgr_gem->no_exec)
2448		goto skip_execution;
2449
2450	ret = drmIoctl(bufmgr_gem->fd,
2451		       DRM_IOCTL_I915_GEM_EXECBUFFER2,
2452		       &execbuf);
2453	if (ret != 0) {
2454		ret = -errno;
2455		if (ret == -ENOSPC) {
2456			DBG("Execbuffer fails to pin. "
2457			    "Estimate: %u. Actual: %u. Available: %u\n",
2458			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
2459							       bufmgr_gem->exec_count),
2460			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
2461							      bufmgr_gem->exec_count),
2462			    (unsigned int) bufmgr_gem->gtt_size);
2463		}
2464	}
2465	drm_intel_update_buffer_offsets2(bufmgr_gem);
2466
2467skip_execution:
2468	if (bufmgr_gem->bufmgr.debug)
2469		drm_intel_gem_dump_validation_list(bufmgr_gem);
2470
2471	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2472		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
2473		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
2474
2475		bo_gem->idle = false;
2476
2477		/* Disconnect the buffer from the validate list */
2478		bo_gem->validate_index = -1;
2479		bufmgr_gem->exec_bos[i] = NULL;
2480	}
2481	bufmgr_gem->exec_count = 0;
2482	pthread_mutex_unlock(&bufmgr_gem->lock);
2483
2484	return ret;
2485}
2486
2487static int
2488drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used,
2489		       drm_clip_rect_t *cliprects, int num_cliprects,
2490		       int DR4)
2491{
2492	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
2493			I915_EXEC_RENDER);
2494}
2495
2496static int
2497drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
2498			drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
2499			unsigned int flags)
2500{
2501	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
2502			flags);
2503}
2504
2505drm_public int
2506drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx,
2507			      int used, unsigned int flags)
2508{
2509	return do_exec2(bo, used, ctx, NULL, 0, 0, flags);
2510}
2511
2512static int
2513drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment)
2514{
2515	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2516	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2517	struct drm_i915_gem_pin pin;
2518	int ret;
2519
2520	VG_CLEAR(pin);
2521	pin.handle = bo_gem->gem_handle;
2522	pin.alignment = alignment;
2523
2524	ret = drmIoctl(bufmgr_gem->fd,
2525		       DRM_IOCTL_I915_GEM_PIN,
2526		       &pin);
2527	if (ret != 0)
2528		return -errno;
2529
2530	bo->offset64 = pin.offset;
2531	bo->offset = pin.offset;
2532	return 0;
2533}
2534
2535static int
2536drm_intel_gem_bo_unpin(drm_intel_bo *bo)
2537{
2538	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2539	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2540	struct drm_i915_gem_unpin unpin;
2541	int ret;
2542
2543	VG_CLEAR(unpin);
2544	unpin.handle = bo_gem->gem_handle;
2545
2546	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin);
2547	if (ret != 0)
2548		return -errno;
2549
2550	return 0;
2551}
2552
2553static int
2554drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
2555				     uint32_t tiling_mode,
2556				     uint32_t stride)
2557{
2558	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2559	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2560	struct drm_i915_gem_set_tiling set_tiling;
2561	int ret;
2562
2563	if (bo_gem->global_name == 0 &&
2564	    tiling_mode == bo_gem->tiling_mode &&
2565	    stride == bo_gem->stride)
2566		return 0;
2567
2568	memset(&set_tiling, 0, sizeof(set_tiling));
2569	do {
2570		/* set_tiling is slightly broken and overwrites the
2571		 * input on the error path, so we have to open code
2572		 * rmIoctl.
2573		 */
2574		set_tiling.handle = bo_gem->gem_handle;
2575		set_tiling.tiling_mode = tiling_mode;
2576		set_tiling.stride = stride;
2577
2578		ret = ioctl(bufmgr_gem->fd,
2579			    DRM_IOCTL_I915_GEM_SET_TILING,
2580			    &set_tiling);
2581	} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
2582	if (ret == -1)
2583		return -errno;
2584
2585	bo_gem->tiling_mode = set_tiling.tiling_mode;
2586	bo_gem->swizzle_mode = set_tiling.swizzle_mode;
2587	bo_gem->stride = set_tiling.stride;
2588	return 0;
2589}
2590
2591static int
2592drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
2593			    uint32_t stride)
2594{
2595	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2596	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2597	int ret;
2598
2599	/* Tiling with userptr surfaces is not supported
2600	 * on all hardware so refuse it for time being.
2601	 */
2602	if (bo_gem->is_userptr)
2603		return -EINVAL;
2604
2605	/* Linear buffers have no stride. By ensuring that we only ever use
2606	 * stride 0 with linear buffers, we simplify our code.
2607	 */
2608	if (*tiling_mode == I915_TILING_NONE)
2609		stride = 0;
2610
2611	ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride);
2612	if (ret == 0)
2613		drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
2614
2615	*tiling_mode = bo_gem->tiling_mode;
2616	return ret;
2617}
2618
2619static int
2620drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
2621			    uint32_t * swizzle_mode)
2622{
2623	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2624
2625	*tiling_mode = bo_gem->tiling_mode;
2626	*swizzle_mode = bo_gem->swizzle_mode;
2627	return 0;
2628}
2629
2630drm_public drm_intel_bo *
2631drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size)
2632{
2633	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
2634	int ret;
2635	uint32_t handle;
2636	drm_intel_bo_gem *bo_gem;
2637	struct drm_i915_gem_get_tiling get_tiling;
2638	drmMMListHead *list;
2639
2640	ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle);
2641
2642	/*
2643	 * See if the kernel has already returned this buffer to us. Just as
2644	 * for named buffers, we must not create two bo's pointing at the same
2645	 * kernel object
2646	 */
2647	pthread_mutex_lock(&bufmgr_gem->lock);
2648	for (list = bufmgr_gem->named.next;
2649	     list != &bufmgr_gem->named;
2650	     list = list->next) {
2651		bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list);
2652		if (bo_gem->gem_handle == handle) {
2653			drm_intel_gem_bo_reference(&bo_gem->bo);
2654			pthread_mutex_unlock(&bufmgr_gem->lock);
2655			return &bo_gem->bo;
2656		}
2657	}
2658
2659	if (ret) {
2660	  fprintf(stderr,"ret is %d %d\n", ret, errno);
2661	  pthread_mutex_unlock(&bufmgr_gem->lock);
2662		return NULL;
2663	}
2664
2665	bo_gem = calloc(1, sizeof(*bo_gem));
2666	if (!bo_gem) {
2667		pthread_mutex_unlock(&bufmgr_gem->lock);
2668		return NULL;
2669	}
2670	/* Determine size of bo.  The fd-to-handle ioctl really should
2671	 * return the size, but it doesn't.  If we have kernel 3.12 or
2672	 * later, we can lseek on the prime fd to get the size.  Older
2673	 * kernels will just fail, in which case we fall back to the
2674	 * provided (estimated or guess size). */
2675	ret = lseek(prime_fd, 0, SEEK_END);
2676	if (ret != -1)
2677		bo_gem->bo.size = ret;
2678	else
2679		bo_gem->bo.size = size;
2680
2681	bo_gem->bo.handle = handle;
2682	bo_gem->bo.bufmgr = bufmgr;
2683
2684	bo_gem->gem_handle = handle;
2685
2686	atomic_set(&bo_gem->refcount, 1);
2687
2688	bo_gem->name = "prime";
2689	bo_gem->validate_index = -1;
2690	bo_gem->reloc_tree_fences = 0;
2691	bo_gem->used_as_reloc_target = false;
2692	bo_gem->has_error = false;
2693	bo_gem->reusable = false;
2694
2695	DRMINITLISTHEAD(&bo_gem->vma_list);
2696	DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
2697	pthread_mutex_unlock(&bufmgr_gem->lock);
2698
2699	VG_CLEAR(get_tiling);
2700	get_tiling.handle = bo_gem->gem_handle;
2701	ret = drmIoctl(bufmgr_gem->fd,
2702		       DRM_IOCTL_I915_GEM_GET_TILING,
2703		       &get_tiling);
2704	if (ret != 0) {
2705		drm_intel_gem_bo_unreference(&bo_gem->bo);
2706		return NULL;
2707	}
2708	bo_gem->tiling_mode = get_tiling.tiling_mode;
2709	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
2710	/* XXX stride is unknown */
2711	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
2712
2713	return &bo_gem->bo;
2714}
2715
2716drm_public int
2717drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd)
2718{
2719	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2720	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2721
2722	pthread_mutex_lock(&bufmgr_gem->lock);
2723        if (DRMLISTEMPTY(&bo_gem->name_list))
2724                DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
2725	pthread_mutex_unlock(&bufmgr_gem->lock);
2726
2727	if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle,
2728			       DRM_CLOEXEC, prime_fd) != 0)
2729		return -errno;
2730
2731	bo_gem->reusable = false;
2732
2733	return 0;
2734}
2735
2736static int
2737drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name)
2738{
2739	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2740	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2741	int ret;
2742
2743	if (!bo_gem->global_name) {
2744		struct drm_gem_flink flink;
2745
2746		VG_CLEAR(flink);
2747		flink.handle = bo_gem->gem_handle;
2748
2749		pthread_mutex_lock(&bufmgr_gem->lock);
2750
2751		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink);
2752		if (ret != 0) {
2753			pthread_mutex_unlock(&bufmgr_gem->lock);
2754			return -errno;
2755		}
2756
2757		bo_gem->global_name = flink.name;
2758		bo_gem->reusable = false;
2759
2760                if (DRMLISTEMPTY(&bo_gem->name_list))
2761                        DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
2762		pthread_mutex_unlock(&bufmgr_gem->lock);
2763	}
2764
2765	*name = bo_gem->global_name;
2766	return 0;
2767}
2768
2769/**
2770 * Enables unlimited caching of buffer objects for reuse.
2771 *
2772 * This is potentially very memory expensive, as the cache at each bucket
2773 * size is only bounded by how many buffers of that size we've managed to have
2774 * in flight at once.
2775 */
2776drm_public void
2777drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr)
2778{
2779	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
2780
2781	bufmgr_gem->bo_reuse = true;
2782}
2783
2784/**
2785 * Enable use of fenced reloc type.
2786 *
2787 * New code should enable this to avoid unnecessary fence register
2788 * allocation.  If this option is not enabled, all relocs will have fence
2789 * register allocated.
2790 */
2791drm_public void
2792drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr)
2793{
2794	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
2795
2796	if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2)
2797		bufmgr_gem->fenced_relocs = true;
2798}
2799
2800/**
2801 * Return the additional aperture space required by the tree of buffer objects
2802 * rooted at bo.
2803 */
2804static int
2805drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo)
2806{
2807	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2808	int i;
2809	int total = 0;
2810
2811	if (bo == NULL || bo_gem->included_in_check_aperture)
2812		return 0;
2813
2814	total += bo->size;
2815	bo_gem->included_in_check_aperture = true;
2816
2817	for (i = 0; i < bo_gem->reloc_count; i++)
2818		total +=
2819		    drm_intel_gem_bo_get_aperture_space(bo_gem->
2820							reloc_target_info[i].bo);
2821
2822	return total;
2823}
2824
2825/**
2826 * Count the number of buffers in this list that need a fence reg
2827 *
2828 * If the count is greater than the number of available regs, we'll have
2829 * to ask the caller to resubmit a batch with fewer tiled buffers.
2830 *
2831 * This function over-counts if the same buffer is used multiple times.
2832 */
2833static unsigned int
2834drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count)
2835{
2836	int i;
2837	unsigned int total = 0;
2838
2839	for (i = 0; i < count; i++) {
2840		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
2841
2842		if (bo_gem == NULL)
2843			continue;
2844
2845		total += bo_gem->reloc_tree_fences;
2846	}
2847	return total;
2848}
2849
2850/**
2851 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready
2852 * for the next drm_intel_bufmgr_check_aperture_space() call.
2853 */
2854static void
2855drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo)
2856{
2857	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2858	int i;
2859
2860	if (bo == NULL || !bo_gem->included_in_check_aperture)
2861		return;
2862
2863	bo_gem->included_in_check_aperture = false;
2864
2865	for (i = 0; i < bo_gem->reloc_count; i++)
2866		drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->
2867							   reloc_target_info[i].bo);
2868}
2869
2870/**
2871 * Return a conservative estimate for the amount of aperture required
2872 * for a collection of buffers. This may double-count some buffers.
2873 */
2874static unsigned int
2875drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count)
2876{
2877	int i;
2878	unsigned int total = 0;
2879
2880	for (i = 0; i < count; i++) {
2881		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
2882		if (bo_gem != NULL)
2883			total += bo_gem->reloc_tree_size;
2884	}
2885	return total;
2886}
2887
2888/**
2889 * Return the amount of aperture needed for a collection of buffers.
2890 * This avoids double counting any buffers, at the cost of looking
2891 * at every buffer in the set.
2892 */
2893static unsigned int
2894drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count)
2895{
2896	int i;
2897	unsigned int total = 0;
2898
2899	for (i = 0; i < count; i++) {
2900		total += drm_intel_gem_bo_get_aperture_space(bo_array[i]);
2901		/* For the first buffer object in the array, we get an
2902		 * accurate count back for its reloc_tree size (since nothing
2903		 * had been flagged as being counted yet).  We can save that
2904		 * value out as a more conservative reloc_tree_size that
2905		 * avoids double-counting target buffers.  Since the first
2906		 * buffer happens to usually be the batch buffer in our
2907		 * callers, this can pull us back from doing the tree
2908		 * walk on every new batch emit.
2909		 */
2910		if (i == 0) {
2911			drm_intel_bo_gem *bo_gem =
2912			    (drm_intel_bo_gem *) bo_array[i];
2913			bo_gem->reloc_tree_size = total;
2914		}
2915	}
2916
2917	for (i = 0; i < count; i++)
2918		drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]);
2919	return total;
2920}
2921
2922/**
2923 * Return -1 if the batchbuffer should be flushed before attempting to
2924 * emit rendering referencing the buffers pointed to by bo_array.
2925 *
2926 * This is required because if we try to emit a batchbuffer with relocations
2927 * to a tree of buffers that won't simultaneously fit in the aperture,
2928 * the rendering will return an error at a point where the software is not
2929 * prepared to recover from it.
2930 *
2931 * However, we also want to emit the batchbuffer significantly before we reach
2932 * the limit, as a series of batchbuffers each of which references buffers
2933 * covering almost all of the aperture means that at each emit we end up
2934 * waiting to evict a buffer from the last rendering, and we get synchronous
2935 * performance.  By emitting smaller batchbuffers, we eat some CPU overhead to
2936 * get better parallelism.
2937 */
2938static int
2939drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count)
2940{
2941	drm_intel_bufmgr_gem *bufmgr_gem =
2942	    (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr;
2943	unsigned int total = 0;
2944	unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4;
2945	int total_fences;
2946
2947	/* Check for fence reg constraints if necessary */
2948	if (bufmgr_gem->available_fences) {
2949		total_fences = drm_intel_gem_total_fences(bo_array, count);
2950		if (total_fences > bufmgr_gem->available_fences)
2951			return -ENOSPC;
2952	}
2953
2954	total = drm_intel_gem_estimate_batch_space(bo_array, count);
2955
2956	if (total > threshold)
2957		total = drm_intel_gem_compute_batch_space(bo_array, count);
2958
2959	if (total > threshold) {
2960		DBG("check_space: overflowed available aperture, "
2961		    "%dkb vs %dkb\n",
2962		    total / 1024, (int)bufmgr_gem->gtt_size / 1024);
2963		return -ENOSPC;
2964	} else {
2965		DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024,
2966		    (int)bufmgr_gem->gtt_size / 1024);
2967		return 0;
2968	}
2969}
2970
2971/*
2972 * Disable buffer reuse for objects which are shared with the kernel
2973 * as scanout buffers
2974 */
2975static int
2976drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo)
2977{
2978	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2979
2980	bo_gem->reusable = false;
2981	return 0;
2982}
2983
2984static int
2985drm_intel_gem_bo_is_reusable(drm_intel_bo *bo)
2986{
2987	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2988
2989	return bo_gem->reusable;
2990}
2991
2992static int
2993_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
2994{
2995	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2996	int i;
2997
2998	for (i = 0; i < bo_gem->reloc_count; i++) {
2999		if (bo_gem->reloc_target_info[i].bo == target_bo)
3000			return 1;
3001		if (bo == bo_gem->reloc_target_info[i].bo)
3002			continue;
3003		if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo,
3004						target_bo))
3005			return 1;
3006	}
3007
3008	return 0;
3009}
3010
3011/** Return true if target_bo is referenced by bo's relocation tree. */
3012static int
3013drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
3014{
3015	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
3016
3017	if (bo == NULL || target_bo == NULL)
3018		return 0;
3019	if (target_bo_gem->used_as_reloc_target)
3020		return _drm_intel_gem_bo_references(bo, target_bo);
3021	return 0;
3022}
3023
3024static void
3025add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size)
3026{
3027	unsigned int i = bufmgr_gem->num_buckets;
3028
3029	assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket));
3030
3031	DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
3032	bufmgr_gem->cache_bucket[i].size = size;
3033	bufmgr_gem->num_buckets++;
3034}
3035
3036static void
3037init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem)
3038{
3039	unsigned long size, cache_max_size = 64 * 1024 * 1024;
3040
3041	/* OK, so power of two buckets was too wasteful of memory.
3042	 * Give 3 other sizes between each power of two, to hopefully
3043	 * cover things accurately enough.  (The alternative is
3044	 * probably to just go for exact matching of sizes, and assume
3045	 * that for things like composited window resize the tiled
3046	 * width/height alignment and rounding of sizes to pages will
3047	 * get us useful cache hit rates anyway)
3048	 */
3049	add_bucket(bufmgr_gem, 4096);
3050	add_bucket(bufmgr_gem, 4096 * 2);
3051	add_bucket(bufmgr_gem, 4096 * 3);
3052
3053	/* Initialize the linked lists for BO reuse cache. */
3054	for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
3055		add_bucket(bufmgr_gem, size);
3056
3057		add_bucket(bufmgr_gem, size + size * 1 / 4);
3058		add_bucket(bufmgr_gem, size + size * 2 / 4);
3059		add_bucket(bufmgr_gem, size + size * 3 / 4);
3060	}
3061}
3062
3063drm_public void
3064drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit)
3065{
3066	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3067
3068	bufmgr_gem->vma_max = limit;
3069
3070	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
3071}
3072
3073/**
3074 * Get the PCI ID for the device.  This can be overridden by setting the
3075 * INTEL_DEVID_OVERRIDE environment variable to the desired ID.
3076 */
3077static int
3078get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem)
3079{
3080	char *devid_override;
3081	int devid;
3082	int ret;
3083	drm_i915_getparam_t gp;
3084
3085	if (geteuid() == getuid()) {
3086		devid_override = getenv("INTEL_DEVID_OVERRIDE");
3087		if (devid_override) {
3088			bufmgr_gem->no_exec = true;
3089			return strtod(devid_override, NULL);
3090		}
3091	}
3092
3093	VG_CLEAR(devid);
3094	VG_CLEAR(gp);
3095	gp.param = I915_PARAM_CHIPSET_ID;
3096	gp.value = &devid;
3097	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3098	if (ret) {
3099		fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno);
3100		fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
3101	}
3102	return devid;
3103}
3104
3105drm_public int
3106drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr)
3107{
3108	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3109
3110	return bufmgr_gem->pci_device;
3111}
3112
3113/**
3114 * Sets the AUB filename.
3115 *
3116 * This function has to be called before drm_intel_bufmgr_gem_set_aub_dump()
3117 * for it to have any effect.
3118 */
3119drm_public void
3120drm_intel_bufmgr_gem_set_aub_filename(drm_intel_bufmgr *bufmgr,
3121				      const char *filename)
3122{
3123	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3124
3125	free(bufmgr_gem->aub_filename);
3126	if (filename)
3127		bufmgr_gem->aub_filename = strdup(filename);
3128}
3129
3130/**
3131 * Sets up AUB dumping.
3132 *
3133 * This is a trace file format that can be used with the simulator.
3134 * Packets are emitted in a format somewhat like GPU command packets.
3135 * You can set up a GTT and upload your objects into the referenced
3136 * space, then send off batchbuffers and get BMPs out the other end.
3137 */
3138drm_public void
3139drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable)
3140{
3141	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3142	int entry = 0x200003;
3143	int i;
3144	int gtt_size = 0x10000;
3145	const char *filename;
3146
3147	if (!enable) {
3148		if (bufmgr_gem->aub_file) {
3149			fclose(bufmgr_gem->aub_file);
3150			bufmgr_gem->aub_file = NULL;
3151		}
3152		return;
3153	}
3154
3155	if (geteuid() != getuid())
3156		return;
3157
3158	if (bufmgr_gem->aub_filename)
3159		filename = bufmgr_gem->aub_filename;
3160	else
3161		filename = "intel.aub";
3162	bufmgr_gem->aub_file = fopen(filename, "w+");
3163	if (!bufmgr_gem->aub_file)
3164		return;
3165
3166	/* Start allocating objects from just after the GTT. */
3167	bufmgr_gem->aub_offset = gtt_size;
3168
3169	/* Start with a (required) version packet. */
3170	aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2));
3171	aub_out(bufmgr_gem,
3172		(4 << AUB_HEADER_MAJOR_SHIFT) |
3173		(0 << AUB_HEADER_MINOR_SHIFT));
3174	for (i = 0; i < 8; i++) {
3175		aub_out(bufmgr_gem, 0); /* app name */
3176	}
3177	aub_out(bufmgr_gem, 0); /* timestamp */
3178	aub_out(bufmgr_gem, 0); /* timestamp */
3179	aub_out(bufmgr_gem, 0); /* comment len */
3180
3181	/* Set up the GTT. The max we can handle is 256M */
3182	aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2));
3183	/* Need to use GTT_ENTRY type for recent emulator */
3184	aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_GTT_ENTRY | 0 | AUB_TRACE_OP_DATA_WRITE);
3185	aub_out(bufmgr_gem, 0); /* subtype */
3186	aub_out(bufmgr_gem, 0); /* offset */
3187	aub_out(bufmgr_gem, gtt_size); /* size */
3188	if (bufmgr_gem->gen >= 8)
3189		aub_out(bufmgr_gem, 0);
3190	for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) {
3191		aub_out(bufmgr_gem, entry);
3192	}
3193}
3194
3195drm_public drm_intel_context *
3196drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr)
3197{
3198	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3199	struct drm_i915_gem_context_create create;
3200	drm_intel_context *context = NULL;
3201	int ret;
3202
3203	context = calloc(1, sizeof(*context));
3204	if (!context)
3205		return NULL;
3206
3207	VG_CLEAR(create);
3208	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
3209	if (ret != 0) {
3210		DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n",
3211		    strerror(errno));
3212		free(context);
3213		return NULL;
3214	}
3215
3216	context->ctx_id = create.ctx_id;
3217	context->bufmgr = bufmgr;
3218
3219	return context;
3220}
3221
3222drm_public void
3223drm_intel_gem_context_destroy(drm_intel_context *ctx)
3224{
3225	drm_intel_bufmgr_gem *bufmgr_gem;
3226	struct drm_i915_gem_context_destroy destroy;
3227	int ret;
3228
3229	if (ctx == NULL)
3230		return;
3231
3232	VG_CLEAR(destroy);
3233
3234	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
3235	destroy.ctx_id = ctx->ctx_id;
3236	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY,
3237		       &destroy);
3238	if (ret != 0)
3239		fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
3240			strerror(errno));
3241
3242	free(ctx);
3243}
3244
3245drm_public int
3246drm_intel_get_reset_stats(drm_intel_context *ctx,
3247			  uint32_t *reset_count,
3248			  uint32_t *active,
3249			  uint32_t *pending)
3250{
3251	drm_intel_bufmgr_gem *bufmgr_gem;
3252	struct drm_i915_reset_stats stats;
3253	int ret;
3254
3255	if (ctx == NULL)
3256		return -EINVAL;
3257
3258	memset(&stats, 0, sizeof(stats));
3259
3260	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
3261	stats.ctx_id = ctx->ctx_id;
3262	ret = drmIoctl(bufmgr_gem->fd,
3263		       DRM_IOCTL_I915_GET_RESET_STATS,
3264		       &stats);
3265	if (ret == 0) {
3266		if (reset_count != NULL)
3267			*reset_count = stats.reset_count;
3268
3269		if (active != NULL)
3270			*active = stats.batch_active;
3271
3272		if (pending != NULL)
3273			*pending = stats.batch_pending;
3274	}
3275
3276	return ret;
3277}
3278
3279drm_public int
3280drm_intel_reg_read(drm_intel_bufmgr *bufmgr,
3281		   uint32_t offset,
3282		   uint64_t *result)
3283{
3284	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3285	struct drm_i915_reg_read reg_read;
3286	int ret;
3287
3288	VG_CLEAR(reg_read);
3289	reg_read.offset = offset;
3290
3291	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, &reg_read);
3292
3293	*result = reg_read.val;
3294	return ret;
3295}
3296
3297
3298/**
3299 * Annotate the given bo for use in aub dumping.
3300 *
3301 * \param annotations is an array of drm_intel_aub_annotation objects
3302 * describing the type of data in various sections of the bo.  Each
3303 * element of the array specifies the type and subtype of a section of
3304 * the bo, and the past-the-end offset of that section.  The elements
3305 * of \c annotations must be sorted so that ending_offset is
3306 * increasing.
3307 *
3308 * \param count is the number of elements in the \c annotations array.
3309 * If \c count is zero, then \c annotations will not be dereferenced.
3310 *
3311 * Annotations are copied into a private data structure, so caller may
3312 * re-use the memory pointed to by \c annotations after the call
3313 * returns.
3314 *
3315 * Annotations are stored for the lifetime of the bo; to reset to the
3316 * default state (no annotations), call this function with a \c count
3317 * of zero.
3318 */
3319drm_public void
3320drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo,
3321					 drm_intel_aub_annotation *annotations,
3322					 unsigned count)
3323{
3324	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3325	unsigned size = sizeof(*annotations) * count;
3326	drm_intel_aub_annotation *new_annotations =
3327		count > 0 ? realloc(bo_gem->aub_annotations, size) : NULL;
3328	if (new_annotations == NULL) {
3329		free(bo_gem->aub_annotations);
3330		bo_gem->aub_annotations = NULL;
3331		bo_gem->aub_annotation_count = 0;
3332		return;
3333	}
3334	memcpy(new_annotations, annotations, size);
3335	bo_gem->aub_annotations = new_annotations;
3336	bo_gem->aub_annotation_count = count;
3337}
3338
3339static pthread_mutex_t bufmgr_list_mutex = PTHREAD_MUTEX_INITIALIZER;
3340static drmMMListHead bufmgr_list = { &bufmgr_list, &bufmgr_list };
3341
3342static drm_intel_bufmgr_gem *
3343drm_intel_bufmgr_gem_find(int fd)
3344{
3345	drm_intel_bufmgr_gem *bufmgr_gem;
3346
3347	DRMLISTFOREACHENTRY(bufmgr_gem, &bufmgr_list, managers) {
3348		if (bufmgr_gem->fd == fd) {
3349			atomic_inc(&bufmgr_gem->refcount);
3350			return bufmgr_gem;
3351		}
3352	}
3353
3354	return NULL;
3355}
3356
3357static void
3358drm_intel_bufmgr_gem_unref(drm_intel_bufmgr *bufmgr)
3359{
3360	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
3361
3362	if (atomic_add_unless(&bufmgr_gem->refcount, -1, 1)) {
3363		pthread_mutex_lock(&bufmgr_list_mutex);
3364
3365		if (atomic_dec_and_test(&bufmgr_gem->refcount)) {
3366			DRMLISTDEL(&bufmgr_gem->managers);
3367			drm_intel_bufmgr_gem_destroy(bufmgr);
3368		}
3369
3370		pthread_mutex_unlock(&bufmgr_list_mutex);
3371	}
3372}
3373
3374static bool
3375has_userptr(drm_intel_bufmgr_gem *bufmgr_gem)
3376{
3377	int ret;
3378	void *ptr;
3379	long pgsz;
3380	struct drm_i915_gem_userptr userptr;
3381	struct drm_gem_close close_bo;
3382
3383	pgsz = sysconf(_SC_PAGESIZE);
3384	assert(pgsz > 0);
3385
3386	ret = posix_memalign(&ptr, pgsz, pgsz);
3387	if (ret) {
3388		DBG("Failed to get a page (%ld) for userptr detection!\n",
3389			pgsz);
3390		return false;
3391	}
3392
3393	memset(&userptr, 0, sizeof(userptr));
3394	userptr.user_ptr = (__u64)(unsigned long)ptr;
3395	userptr.user_size = pgsz;
3396
3397retry:
3398	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_USERPTR, &userptr);
3399	if (ret) {
3400		if (errno == ENODEV && userptr.flags == 0) {
3401			userptr.flags = I915_USERPTR_UNSYNCHRONIZED;
3402			goto retry;
3403		}
3404		free(ptr);
3405		return false;
3406	}
3407
3408	close_bo.handle = userptr.handle;
3409	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close_bo);
3410	free(ptr);
3411	if (ret) {
3412		fprintf(stderr, "Failed to release test userptr object! (%d) "
3413				"i915 kernel driver may not be sane!\n", errno);
3414		return false;
3415	}
3416
3417	return true;
3418}
3419
3420/**
3421 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
3422 * and manage map buffer objections.
3423 *
3424 * \param fd File descriptor of the opened DRM device.
3425 */
3426drm_public drm_intel_bufmgr *
3427drm_intel_bufmgr_gem_init(int fd, int batch_size)
3428{
3429	drm_intel_bufmgr_gem *bufmgr_gem;
3430	struct drm_i915_gem_get_aperture aperture;
3431	drm_i915_getparam_t gp;
3432	int ret, tmp;
3433	bool exec2 = false;
3434
3435	pthread_mutex_lock(&bufmgr_list_mutex);
3436
3437	bufmgr_gem = drm_intel_bufmgr_gem_find(fd);
3438	if (bufmgr_gem)
3439		goto exit;
3440
3441	bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
3442	if (bufmgr_gem == NULL)
3443		goto exit;
3444
3445	bufmgr_gem->fd = fd;
3446	atomic_set(&bufmgr_gem->refcount, 1);
3447
3448	if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) {
3449		free(bufmgr_gem);
3450		bufmgr_gem = NULL;
3451		goto exit;
3452	}
3453
3454	ret = drmIoctl(bufmgr_gem->fd,
3455		       DRM_IOCTL_I915_GEM_GET_APERTURE,
3456		       &aperture);
3457
3458	if (ret == 0)
3459		bufmgr_gem->gtt_size = aperture.aper_available_size;
3460	else {
3461		fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n",
3462			strerror(errno));
3463		bufmgr_gem->gtt_size = 128 * 1024 * 1024;
3464		fprintf(stderr, "Assuming %dkB available aperture size.\n"
3465			"May lead to reduced performance or incorrect "
3466			"rendering.\n",
3467			(int)bufmgr_gem->gtt_size / 1024);
3468	}
3469
3470	bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem);
3471
3472	if (IS_GEN2(bufmgr_gem->pci_device))
3473		bufmgr_gem->gen = 2;
3474	else if (IS_GEN3(bufmgr_gem->pci_device))
3475		bufmgr_gem->gen = 3;
3476	else if (IS_GEN4(bufmgr_gem->pci_device))
3477		bufmgr_gem->gen = 4;
3478	else if (IS_GEN5(bufmgr_gem->pci_device))
3479		bufmgr_gem->gen = 5;
3480	else if (IS_GEN6(bufmgr_gem->pci_device))
3481		bufmgr_gem->gen = 6;
3482	else if (IS_GEN7(bufmgr_gem->pci_device))
3483		bufmgr_gem->gen = 7;
3484	else if (IS_GEN8(bufmgr_gem->pci_device))
3485		bufmgr_gem->gen = 8;
3486	else if (IS_GEN9(bufmgr_gem->pci_device))
3487		bufmgr_gem->gen = 9;
3488	else {
3489		free(bufmgr_gem);
3490		bufmgr_gem = NULL;
3491		goto exit;
3492	}
3493
3494	if (IS_GEN3(bufmgr_gem->pci_device) &&
3495	    bufmgr_gem->gtt_size > 256*1024*1024) {
3496		/* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't
3497		 * be used for tiled blits. To simplify the accounting, just
3498		 * substract the unmappable part (fixed to 256MB on all known
3499		 * gen3 devices) if the kernel advertises it. */
3500		bufmgr_gem->gtt_size -= 256*1024*1024;
3501	}
3502
3503	VG_CLEAR(gp);
3504	gp.value = &tmp;
3505
3506	gp.param = I915_PARAM_HAS_EXECBUF2;
3507	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3508	if (!ret)
3509		exec2 = true;
3510
3511	gp.param = I915_PARAM_HAS_BSD;
3512	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3513	bufmgr_gem->has_bsd = ret == 0;
3514
3515	gp.param = I915_PARAM_HAS_BLT;
3516	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3517	bufmgr_gem->has_blt = ret == 0;
3518
3519	gp.param = I915_PARAM_HAS_RELAXED_FENCING;
3520	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3521	bufmgr_gem->has_relaxed_fencing = ret == 0;
3522
3523	if (has_userptr(bufmgr_gem))
3524		bufmgr_gem->bufmgr.bo_alloc_userptr =
3525			drm_intel_gem_bo_alloc_userptr;
3526
3527	gp.param = I915_PARAM_HAS_WAIT_TIMEOUT;
3528	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3529	bufmgr_gem->has_wait_timeout = ret == 0;
3530
3531	gp.param = I915_PARAM_HAS_LLC;
3532	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3533	if (ret != 0) {
3534		/* Kernel does not supports HAS_LLC query, fallback to GPU
3535		 * generation detection and assume that we have LLC on GEN6/7
3536		 */
3537		bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) |
3538				IS_GEN7(bufmgr_gem->pci_device));
3539	} else
3540		bufmgr_gem->has_llc = *gp.value;
3541
3542	gp.param = I915_PARAM_HAS_VEBOX;
3543	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3544	bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0);
3545
3546	if (bufmgr_gem->gen < 4) {
3547		gp.param = I915_PARAM_NUM_FENCES_AVAIL;
3548		gp.value = &bufmgr_gem->available_fences;
3549		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3550		if (ret) {
3551			fprintf(stderr, "get fences failed: %d [%d]\n", ret,
3552				errno);
3553			fprintf(stderr, "param: %d, val: %d\n", gp.param,
3554				*gp.value);
3555			bufmgr_gem->available_fences = 0;
3556		} else {
3557			/* XXX The kernel reports the total number of fences,
3558			 * including any that may be pinned.
3559			 *
3560			 * We presume that there will be at least one pinned
3561			 * fence for the scanout buffer, but there may be more
3562			 * than one scanout and the user may be manually
3563			 * pinning buffers. Let's move to execbuffer2 and
3564			 * thereby forget the insanity of using fences...
3565			 */
3566			bufmgr_gem->available_fences -= 2;
3567			if (bufmgr_gem->available_fences < 0)
3568				bufmgr_gem->available_fences = 0;
3569		}
3570	}
3571
3572	/* Let's go with one relocation per every 2 dwords (but round down a bit
3573	 * since a power of two will mean an extra page allocation for the reloc
3574	 * buffer).
3575	 *
3576	 * Every 4 was too few for the blender benchmark.
3577	 */
3578	bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
3579
3580	bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc;
3581	bufmgr_gem->bufmgr.bo_alloc_for_render =
3582	    drm_intel_gem_bo_alloc_for_render;
3583	bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled;
3584	bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference;
3585	bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference;
3586	bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map;
3587	bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap;
3588	bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata;
3589	bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata;
3590	bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
3591	bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
3592	bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
3593	bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
3594	bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
3595	bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
3596	bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling;
3597	bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink;
3598	/* Use the new one if available */
3599	if (exec2) {
3600		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2;
3601		bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2;
3602	} else
3603		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec;
3604	bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy;
3605	bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise;
3606	bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_unref;
3607	bufmgr_gem->bufmgr.debug = 0;
3608	bufmgr_gem->bufmgr.check_aperture_space =
3609	    drm_intel_gem_check_aperture_space;
3610	bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse;
3611	bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable;
3612	bufmgr_gem->bufmgr.get_pipe_from_crtc_id =
3613	    drm_intel_gem_get_pipe_from_crtc_id;
3614	bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references;
3615
3616	DRMINITLISTHEAD(&bufmgr_gem->named);
3617	init_cache_buckets(bufmgr_gem);
3618
3619	DRMINITLISTHEAD(&bufmgr_gem->vma_cache);
3620	bufmgr_gem->vma_max = -1; /* unlimited by default */
3621
3622	DRMLISTADD(&bufmgr_gem->managers, &bufmgr_list);
3623
3624exit:
3625	pthread_mutex_unlock(&bufmgr_list_mutex);
3626
3627	return bufmgr_gem != NULL ? &bufmgr_gem->bufmgr : NULL;
3628}
3629