intel_bufmgr_gem.c revision 13e8270504cffa96bd067dc5c792a79555e8b2d4
1/**************************************************************************
2 *
3 * Copyright � 2007 Red Hat Inc.
4 * Copyright � 2007 Intel Corporation
5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
26 * of the Software.
27 *
28 *
29 **************************************************************************/
30/*
31 * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com>
32 *          Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33 *	    Eric Anholt <eric@anholt.net>
34 *	    Dave Airlie <airlied@linux.ie>
35 */
36
37#ifdef HAVE_CONFIG_H
38#include "config.h"
39#endif
40
41#include <xf86drm.h>
42#include <xf86atomic.h>
43#include <fcntl.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <unistd.h>
48#include <assert.h>
49#include <pthread.h>
50#include <sys/ioctl.h>
51#include <sys/mman.h>
52#include <sys/stat.h>
53#include <sys/types.h>
54
55#include "errno.h"
56#include "libdrm_lists.h"
57#include "intel_bufmgr.h"
58#include "intel_bufmgr_priv.h"
59#include "intel_chipset.h"
60#include "string.h"
61
62#include "i915_drm.h"
63
64#define DBG(...) do {					\
65	if (bufmgr_gem->bufmgr.debug)			\
66		fprintf(stderr, __VA_ARGS__);		\
67} while (0)
68
69#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
70
71typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
72
73struct drm_intel_gem_bo_bucket {
74	drmMMListHead head;
75	unsigned long size;
76};
77
78typedef struct _drm_intel_bufmgr_gem {
79	drm_intel_bufmgr bufmgr;
80
81	int fd;
82
83	int max_relocs;
84
85	pthread_mutex_t lock;
86
87	struct drm_i915_gem_exec_object *exec_objects;
88	struct drm_i915_gem_exec_object2 *exec2_objects;
89	drm_intel_bo **exec_bos;
90	int exec_size;
91	int exec_count;
92
93	/** Array of lists of cached gem objects of power-of-two sizes */
94	struct drm_intel_gem_bo_bucket cache_bucket[14 * 4];
95	int num_buckets;
96	time_t time;
97
98	uint64_t gtt_size;
99	int available_fences;
100	int pci_device;
101	int gen;
102	char bo_reuse;
103	char fenced_relocs;
104} drm_intel_bufmgr_gem;
105
106#define DRM_INTEL_RELOC_FENCE (1<<0)
107
108typedef struct _drm_intel_reloc_target_info {
109	drm_intel_bo *bo;
110	int flags;
111} drm_intel_reloc_target;
112
113struct _drm_intel_bo_gem {
114	drm_intel_bo bo;
115
116	atomic_t refcount;
117	uint32_t gem_handle;
118	const char *name;
119
120	/**
121	 * Kenel-assigned global name for this object
122	 */
123	unsigned int global_name;
124
125	/**
126	 * Index of the buffer within the validation list while preparing a
127	 * batchbuffer execution.
128	 */
129	int validate_index;
130
131	/**
132	 * Current tiling mode
133	 */
134	uint32_t tiling_mode;
135	uint32_t swizzle_mode;
136	unsigned long stride;
137
138	time_t free_time;
139
140	/** Array passed to the DRM containing relocation information. */
141	struct drm_i915_gem_relocation_entry *relocs;
142	/**
143	 * Array of info structs corresponding to relocs[i].target_handle etc
144	 */
145	drm_intel_reloc_target *reloc_target_info;
146	/** Number of entries in relocs */
147	int reloc_count;
148	/** Mapped address for the buffer, saved across map/unmap cycles */
149	void *mem_virtual;
150	/** GTT virtual address for the buffer, saved across map/unmap cycles */
151	void *gtt_virtual;
152
153	/** BO cache list */
154	drmMMListHead head;
155
156	/**
157	 * Boolean of whether this BO and its children have been included in
158	 * the current drm_intel_bufmgr_check_aperture_space() total.
159	 */
160	char included_in_check_aperture;
161
162	/**
163	 * Boolean of whether this buffer has been used as a relocation
164	 * target and had its size accounted for, and thus can't have any
165	 * further relocations added to it.
166	 */
167	char used_as_reloc_target;
168
169	/**
170	 * Boolean of whether we have encountered an error whilst building the relocation tree.
171	 */
172	char has_error;
173
174	/**
175	 * Boolean of whether this buffer can be re-used
176	 */
177	char reusable;
178
179	/**
180	 * Size in bytes of this buffer and its relocation descendents.
181	 *
182	 * Used to avoid costly tree walking in
183	 * drm_intel_bufmgr_check_aperture in the common case.
184	 */
185	int reloc_tree_size;
186
187	/**
188	 * Number of potential fence registers required by this buffer and its
189	 * relocations.
190	 */
191	int reloc_tree_fences;
192};
193
194static unsigned int
195drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count);
196
197static unsigned int
198drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count);
199
200static int
201drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
202			    uint32_t * swizzle_mode);
203
204static int
205drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
206				     uint32_t tiling_mode,
207				     uint32_t stride);
208
209static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
210						      time_t time);
211
212static void drm_intel_gem_bo_unreference(drm_intel_bo *bo);
213
214static void drm_intel_gem_bo_free(drm_intel_bo *bo);
215
216static unsigned long
217drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size,
218			   uint32_t *tiling_mode)
219{
220	unsigned long min_size, max_size;
221	unsigned long i;
222
223	if (*tiling_mode == I915_TILING_NONE)
224		return size;
225
226	/* 965+ just need multiples of page size for tiling */
227	if (bufmgr_gem->gen >= 4)
228		return ROUND_UP_TO(size, 4096);
229
230	/* Older chips need powers of two, of at least 512k or 1M */
231	if (bufmgr_gem->gen == 3) {
232		min_size = 1024*1024;
233		max_size = 128*1024*1024;
234	} else {
235		min_size = 512*1024;
236		max_size = 64*1024*1024;
237	}
238
239	if (size > max_size) {
240		*tiling_mode = I915_TILING_NONE;
241		return size;
242	}
243
244	for (i = min_size; i < size; i <<= 1)
245		;
246
247	return i;
248}
249
250/*
251 * Round a given pitch up to the minimum required for X tiling on a
252 * given chip.  We use 512 as the minimum to allow for a later tiling
253 * change.
254 */
255static unsigned long
256drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem,
257			    unsigned long pitch, uint32_t tiling_mode)
258{
259	unsigned long tile_width;
260	unsigned long i;
261
262	/* If untiled, then just align it so that we can do rendering
263	 * to it with the 3D engine.
264	 */
265	if (tiling_mode == I915_TILING_NONE)
266		return ALIGN(pitch, 64);
267
268	if (tiling_mode == I915_TILING_X)
269		tile_width = 512;
270	else
271		tile_width = 128;
272
273	/* 965 is flexible */
274	if (bufmgr_gem->gen >= 4)
275		return ROUND_UP_TO(pitch, tile_width);
276
277	/* Pre-965 needs power of two tile width */
278	for (i = tile_width; i < pitch; i <<= 1)
279		;
280
281	return i;
282}
283
284static struct drm_intel_gem_bo_bucket *
285drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
286				 unsigned long size)
287{
288	int i;
289
290	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
291		struct drm_intel_gem_bo_bucket *bucket =
292		    &bufmgr_gem->cache_bucket[i];
293		if (bucket->size >= size) {
294			return bucket;
295		}
296	}
297
298	return NULL;
299}
300
301static void
302drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
303{
304	int i, j;
305
306	for (i = 0; i < bufmgr_gem->exec_count; i++) {
307		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
308		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
309
310		if (bo_gem->relocs == NULL) {
311			DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle,
312			    bo_gem->name);
313			continue;
314		}
315
316		for (j = 0; j < bo_gem->reloc_count; j++) {
317			drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo;
318			drm_intel_bo_gem *target_gem =
319			    (drm_intel_bo_gem *) target_bo;
320
321			DBG("%2d: %d (%s)@0x%08llx -> "
322			    "%d (%s)@0x%08lx + 0x%08x\n",
323			    i,
324			    bo_gem->gem_handle, bo_gem->name,
325			    (unsigned long long)bo_gem->relocs[j].offset,
326			    target_gem->gem_handle,
327			    target_gem->name,
328			    target_bo->offset,
329			    bo_gem->relocs[j].delta);
330		}
331	}
332}
333
334static inline void
335drm_intel_gem_bo_reference(drm_intel_bo *bo)
336{
337	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
338
339	assert(atomic_read(&bo_gem->refcount) > 0);
340	atomic_inc(&bo_gem->refcount);
341}
342
343/**
344 * Adds the given buffer to the list of buffers to be validated (moved into the
345 * appropriate memory type) with the next batch submission.
346 *
347 * If a buffer is validated multiple times in a batch submission, it ends up
348 * with the intersection of the memory type flags and the union of the
349 * access flags.
350 */
351static void
352drm_intel_add_validate_buffer(drm_intel_bo *bo)
353{
354	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
355	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
356	int index;
357
358	if (bo_gem->validate_index != -1)
359		return;
360
361	/* Extend the array of validation entries as necessary. */
362	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
363		int new_size = bufmgr_gem->exec_size * 2;
364
365		if (new_size == 0)
366			new_size = 5;
367
368		bufmgr_gem->exec_objects =
369		    realloc(bufmgr_gem->exec_objects,
370			    sizeof(*bufmgr_gem->exec_objects) * new_size);
371		bufmgr_gem->exec_bos =
372		    realloc(bufmgr_gem->exec_bos,
373			    sizeof(*bufmgr_gem->exec_bos) * new_size);
374		bufmgr_gem->exec_size = new_size;
375	}
376
377	index = bufmgr_gem->exec_count;
378	bo_gem->validate_index = index;
379	/* Fill in array entry */
380	bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle;
381	bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count;
382	bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs;
383	bufmgr_gem->exec_objects[index].alignment = 0;
384	bufmgr_gem->exec_objects[index].offset = 0;
385	bufmgr_gem->exec_bos[index] = bo;
386	bufmgr_gem->exec_count++;
387}
388
389static void
390drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
391{
392	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
393	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
394	int index;
395
396	if (bo_gem->validate_index != -1) {
397		if (need_fence)
398			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
399				EXEC_OBJECT_NEEDS_FENCE;
400		return;
401	}
402
403	/* Extend the array of validation entries as necessary. */
404	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
405		int new_size = bufmgr_gem->exec_size * 2;
406
407		if (new_size == 0)
408			new_size = 5;
409
410		bufmgr_gem->exec2_objects =
411			realloc(bufmgr_gem->exec2_objects,
412				sizeof(*bufmgr_gem->exec2_objects) * new_size);
413		bufmgr_gem->exec_bos =
414			realloc(bufmgr_gem->exec_bos,
415				sizeof(*bufmgr_gem->exec_bos) * new_size);
416		bufmgr_gem->exec_size = new_size;
417	}
418
419	index = bufmgr_gem->exec_count;
420	bo_gem->validate_index = index;
421	/* Fill in array entry */
422	bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle;
423	bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
424	bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
425	bufmgr_gem->exec2_objects[index].alignment = 0;
426	bufmgr_gem->exec2_objects[index].offset = 0;
427	bufmgr_gem->exec_bos[index] = bo;
428	bufmgr_gem->exec2_objects[index].flags = 0;
429	bufmgr_gem->exec2_objects[index].rsvd1 = 0;
430	bufmgr_gem->exec2_objects[index].rsvd2 = 0;
431	if (need_fence) {
432		bufmgr_gem->exec2_objects[index].flags |=
433			EXEC_OBJECT_NEEDS_FENCE;
434	}
435	bufmgr_gem->exec_count++;
436}
437
438#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
439	sizeof(uint32_t))
440
441static void
442drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem,
443				      drm_intel_bo_gem *bo_gem)
444{
445	int size;
446
447	assert(!bo_gem->used_as_reloc_target);
448
449	/* The older chipsets are far-less flexible in terms of tiling,
450	 * and require tiled buffer to be size aligned in the aperture.
451	 * This means that in the worst possible case we will need a hole
452	 * twice as large as the object in order for it to fit into the
453	 * aperture. Optimal packing is for wimps.
454	 */
455	size = bo_gem->bo.size;
456	if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE)
457		size *= 2;
458
459	bo_gem->reloc_tree_size = size;
460}
461
462static int
463drm_intel_setup_reloc_list(drm_intel_bo *bo)
464{
465	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
466	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
467	unsigned int max_relocs = bufmgr_gem->max_relocs;
468
469	if (bo->size / 4 < max_relocs)
470		max_relocs = bo->size / 4;
471
472	bo_gem->relocs = malloc(max_relocs *
473				sizeof(struct drm_i915_gem_relocation_entry));
474	bo_gem->reloc_target_info = malloc(max_relocs *
475					   sizeof(drm_intel_reloc_target));
476	if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) {
477		bo_gem->has_error = 1;
478
479		free (bo_gem->relocs);
480		bo_gem->relocs = NULL;
481
482		free (bo_gem->reloc_target_info);
483		bo_gem->reloc_target_info = NULL;
484
485		return 1;
486	}
487
488	return 0;
489}
490
491static int
492drm_intel_gem_bo_busy(drm_intel_bo *bo)
493{
494	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
495	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
496	struct drm_i915_gem_busy busy;
497	int ret;
498
499	memset(&busy, 0, sizeof(busy));
500	busy.handle = bo_gem->gem_handle;
501
502	do {
503		ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
504	} while (ret == -1 && errno == EINTR);
505
506	return (ret == 0 && busy.busy);
507}
508
509static int
510drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem,
511				  drm_intel_bo_gem *bo_gem, int state)
512{
513	struct drm_i915_gem_madvise madv;
514
515	madv.handle = bo_gem->gem_handle;
516	madv.madv = state;
517	madv.retained = 1;
518	ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
519
520	return madv.retained;
521}
522
523static int
524drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv)
525{
526	return drm_intel_gem_bo_madvise_internal
527		((drm_intel_bufmgr_gem *) bo->bufmgr,
528		 (drm_intel_bo_gem *) bo,
529		 madv);
530}
531
532/* drop the oldest entries that have been purged by the kernel */
533static void
534drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
535				    struct drm_intel_gem_bo_bucket *bucket)
536{
537	while (!DRMLISTEMPTY(&bucket->head)) {
538		drm_intel_bo_gem *bo_gem;
539
540		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
541				      bucket->head.next, head);
542		if (drm_intel_gem_bo_madvise_internal
543		    (bufmgr_gem, bo_gem, I915_MADV_DONTNEED))
544			break;
545
546		DRMLISTDEL(&bo_gem->head);
547		drm_intel_gem_bo_free(&bo_gem->bo);
548	}
549}
550
551static drm_intel_bo *
552drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
553				const char *name,
554				unsigned long size,
555				unsigned long flags,
556				uint32_t tiling_mode,
557				unsigned long stride)
558{
559	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
560	drm_intel_bo_gem *bo_gem;
561	unsigned int page_size = getpagesize();
562	int ret;
563	struct drm_intel_gem_bo_bucket *bucket;
564	int alloc_from_cache;
565	unsigned long bo_size;
566	int for_render = 0;
567
568	if (flags & BO_ALLOC_FOR_RENDER)
569		for_render = 1;
570
571	/* Round the allocated size up to a power of two number of pages. */
572	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
573
574	/* If we don't have caching at this size, don't actually round the
575	 * allocation up.
576	 */
577	if (bucket == NULL) {
578		bo_size = size;
579		if (bo_size < page_size)
580			bo_size = page_size;
581	} else {
582		bo_size = bucket->size;
583	}
584
585	pthread_mutex_lock(&bufmgr_gem->lock);
586	/* Get a buffer out of the cache if available */
587retry:
588	alloc_from_cache = 0;
589	if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) {
590		if (for_render) {
591			/* Allocate new render-target BOs from the tail (MRU)
592			 * of the list, as it will likely be hot in the GPU
593			 * cache and in the aperture for us.
594			 */
595			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
596					      bucket->head.prev, head);
597			DRMLISTDEL(&bo_gem->head);
598			alloc_from_cache = 1;
599		} else {
600			/* For non-render-target BOs (where we're probably
601			 * going to map it first thing in order to fill it
602			 * with data), check if the last BO in the cache is
603			 * unbusy, and only reuse in that case. Otherwise,
604			 * allocating a new buffer is probably faster than
605			 * waiting for the GPU to finish.
606			 */
607			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
608					      bucket->head.next, head);
609			if (!drm_intel_gem_bo_busy(&bo_gem->bo)) {
610				alloc_from_cache = 1;
611				DRMLISTDEL(&bo_gem->head);
612			}
613		}
614
615		if (alloc_from_cache) {
616			if (!drm_intel_gem_bo_madvise_internal
617			    (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
618				drm_intel_gem_bo_free(&bo_gem->bo);
619				drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
620								    bucket);
621				goto retry;
622			}
623
624			if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
625								 tiling_mode,
626								 stride)) {
627				drm_intel_gem_bo_free(&bo_gem->bo);
628				goto retry;
629			}
630		}
631	}
632	pthread_mutex_unlock(&bufmgr_gem->lock);
633
634	if (!alloc_from_cache) {
635		struct drm_i915_gem_create create;
636
637		bo_gem = calloc(1, sizeof(*bo_gem));
638		if (!bo_gem)
639			return NULL;
640
641		bo_gem->bo.size = bo_size;
642		memset(&create, 0, sizeof(create));
643		create.size = bo_size;
644
645		do {
646			ret = ioctl(bufmgr_gem->fd,
647				    DRM_IOCTL_I915_GEM_CREATE,
648				    &create);
649		} while (ret == -1 && errno == EINTR);
650		bo_gem->gem_handle = create.handle;
651		bo_gem->bo.handle = bo_gem->gem_handle;
652		if (ret != 0) {
653			free(bo_gem);
654			return NULL;
655		}
656		bo_gem->bo.bufmgr = bufmgr;
657
658		bo_gem->tiling_mode = I915_TILING_NONE;
659		bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
660		bo_gem->stride = 0;
661
662		if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
663							 tiling_mode,
664							 stride)) {
665		    drm_intel_gem_bo_free(&bo_gem->bo);
666		    return NULL;
667		}
668	}
669
670	bo_gem->name = name;
671	atomic_set(&bo_gem->refcount, 1);
672	bo_gem->validate_index = -1;
673	bo_gem->reloc_tree_fences = 0;
674	bo_gem->used_as_reloc_target = 0;
675	bo_gem->has_error = 0;
676	bo_gem->reusable = 1;
677
678	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
679
680	DBG("bo_create: buf %d (%s) %ldb\n",
681	    bo_gem->gem_handle, bo_gem->name, size);
682
683	return &bo_gem->bo;
684}
685
686static drm_intel_bo *
687drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
688				  const char *name,
689				  unsigned long size,
690				  unsigned int alignment)
691{
692	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
693					       BO_ALLOC_FOR_RENDER,
694					       I915_TILING_NONE, 0);
695}
696
697static drm_intel_bo *
698drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr,
699		       const char *name,
700		       unsigned long size,
701		       unsigned int alignment)
702{
703	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0,
704					       I915_TILING_NONE, 0);
705}
706
707static drm_intel_bo *
708drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
709			     int x, int y, int cpp, uint32_t *tiling_mode,
710			     unsigned long *pitch, unsigned long flags)
711{
712	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
713	drm_intel_bo *bo;
714	unsigned long size, stride;
715	uint32_t tiling;
716
717	do {
718		unsigned long aligned_y;
719
720		tiling = *tiling_mode;
721
722		/* If we're tiled, our allocations are in 8 or 32-row blocks,
723		 * so failure to align our height means that we won't allocate
724		 * enough pages.
725		 *
726		 * If we're untiled, we still have to align to 2 rows high
727		 * because the data port accesses 2x2 blocks even if the
728		 * bottom row isn't to be rendered, so failure to align means
729		 * we could walk off the end of the GTT and fault.  This is
730		 * documented on 965, and may be the case on older chipsets
731		 * too so we try to be careful.
732		 */
733		aligned_y = y;
734		if (tiling == I915_TILING_NONE)
735			aligned_y = ALIGN(y, 2);
736		else if (tiling == I915_TILING_X)
737			aligned_y = ALIGN(y, 8);
738		else if (tiling == I915_TILING_Y)
739			aligned_y = ALIGN(y, 32);
740
741		stride = x * cpp;
742		stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling);
743		size = stride * aligned_y;
744		size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode);
745	} while (*tiling_mode != tiling);
746
747	if (*tiling_mode == I915_TILING_NONE)
748		stride = 0;
749
750	bo = drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags,
751					     *tiling_mode, stride);
752	if (!bo)
753		return NULL;
754
755	*pitch = stride;
756	return bo;
757}
758
759/**
760 * Returns a drm_intel_bo wrapping the given buffer object handle.
761 *
762 * This can be used when one application needs to pass a buffer object
763 * to another.
764 */
765drm_intel_bo *
766drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr,
767				  const char *name,
768				  unsigned int handle)
769{
770	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
771	drm_intel_bo_gem *bo_gem;
772	int ret;
773	struct drm_gem_open open_arg;
774	struct drm_i915_gem_get_tiling get_tiling;
775
776	bo_gem = calloc(1, sizeof(*bo_gem));
777	if (!bo_gem)
778		return NULL;
779
780	memset(&open_arg, 0, sizeof(open_arg));
781	open_arg.name = handle;
782	do {
783		ret = ioctl(bufmgr_gem->fd,
784			    DRM_IOCTL_GEM_OPEN,
785			    &open_arg);
786	} while (ret == -1 && errno == EINTR);
787	if (ret != 0) {
788		fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n",
789			name, handle, strerror(errno));
790		free(bo_gem);
791		return NULL;
792	}
793	bo_gem->bo.size = open_arg.size;
794	bo_gem->bo.offset = 0;
795	bo_gem->bo.virtual = NULL;
796	bo_gem->bo.bufmgr = bufmgr;
797	bo_gem->name = name;
798	atomic_set(&bo_gem->refcount, 1);
799	bo_gem->validate_index = -1;
800	bo_gem->gem_handle = open_arg.handle;
801	bo_gem->global_name = handle;
802	bo_gem->reusable = 0;
803
804	memset(&get_tiling, 0, sizeof(get_tiling));
805	get_tiling.handle = bo_gem->gem_handle;
806	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling);
807	if (ret != 0) {
808		drm_intel_gem_bo_unreference(&bo_gem->bo);
809		return NULL;
810	}
811	bo_gem->tiling_mode = get_tiling.tiling_mode;
812	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
813	/* XXX stride is unknown */
814	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
815
816	DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
817
818	return &bo_gem->bo;
819}
820
821static void
822drm_intel_gem_bo_free(drm_intel_bo *bo)
823{
824	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
825	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
826	struct drm_gem_close close;
827	int ret;
828
829	if (bo_gem->mem_virtual)
830		munmap(bo_gem->mem_virtual, bo_gem->bo.size);
831	if (bo_gem->gtt_virtual)
832		munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
833
834	/* Close this object */
835	memset(&close, 0, sizeof(close));
836	close.handle = bo_gem->gem_handle;
837	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close);
838	if (ret != 0) {
839		fprintf(stderr,
840			"DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
841			bo_gem->gem_handle, bo_gem->name, strerror(errno));
842	}
843	free(bo);
844}
845
846/** Frees all cached buffers significantly older than @time. */
847static void
848drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
849{
850	int i;
851
852	if (bufmgr_gem->time == time)
853		return;
854
855	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
856		struct drm_intel_gem_bo_bucket *bucket =
857		    &bufmgr_gem->cache_bucket[i];
858
859		while (!DRMLISTEMPTY(&bucket->head)) {
860			drm_intel_bo_gem *bo_gem;
861
862			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
863					      bucket->head.next, head);
864			if (time - bo_gem->free_time <= 1)
865				break;
866
867			DRMLISTDEL(&bo_gem->head);
868
869			drm_intel_gem_bo_free(&bo_gem->bo);
870		}
871	}
872
873	bufmgr_gem->time = time;
874}
875
876static void
877drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
878{
879	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
880	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
881	struct drm_intel_gem_bo_bucket *bucket;
882	int i;
883
884	/* Unreference all the target buffers */
885	for (i = 0; i < bo_gem->reloc_count; i++) {
886		if (bo_gem->reloc_target_info[i].bo != bo) {
887			drm_intel_gem_bo_unreference_locked_timed(bo_gem->
888								  reloc_target_info[i].bo,
889								  time);
890		}
891	}
892	bo_gem->reloc_count = 0;
893	bo_gem->used_as_reloc_target = 0;
894
895	DBG("bo_unreference final: %d (%s)\n",
896	    bo_gem->gem_handle, bo_gem->name);
897
898	/* release memory associated with this object */
899	if (bo_gem->reloc_target_info) {
900		free(bo_gem->reloc_target_info);
901		bo_gem->reloc_target_info = NULL;
902	}
903	if (bo_gem->relocs) {
904		free(bo_gem->relocs);
905		bo_gem->relocs = NULL;
906	}
907
908	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
909	/* Put the buffer into our internal cache for reuse if we can. */
910	if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
911	    drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
912					      I915_MADV_DONTNEED)) {
913		bo_gem->free_time = time;
914
915		bo_gem->name = NULL;
916		bo_gem->validate_index = -1;
917
918		DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
919	} else {
920		drm_intel_gem_bo_free(bo);
921	}
922}
923
924static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
925						      time_t time)
926{
927	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
928
929	assert(atomic_read(&bo_gem->refcount) > 0);
930	if (atomic_dec_and_test(&bo_gem->refcount))
931		drm_intel_gem_bo_unreference_final(bo, time);
932}
933
934static void drm_intel_gem_bo_unreference(drm_intel_bo *bo)
935{
936	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
937
938	assert(atomic_read(&bo_gem->refcount) > 0);
939	if (atomic_dec_and_test(&bo_gem->refcount)) {
940		drm_intel_bufmgr_gem *bufmgr_gem =
941		    (drm_intel_bufmgr_gem *) bo->bufmgr;
942		struct timespec time;
943
944		clock_gettime(CLOCK_MONOTONIC, &time);
945
946		pthread_mutex_lock(&bufmgr_gem->lock);
947		drm_intel_gem_bo_unreference_final(bo, time.tv_sec);
948		drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec);
949		pthread_mutex_unlock(&bufmgr_gem->lock);
950	}
951}
952
953static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
954{
955	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
956	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
957	struct drm_i915_gem_set_domain set_domain;
958	int ret;
959
960	pthread_mutex_lock(&bufmgr_gem->lock);
961
962	/* Allow recursive mapping. Mesa may recursively map buffers with
963	 * nested display loops.
964	 */
965	if (!bo_gem->mem_virtual) {
966		struct drm_i915_gem_mmap mmap_arg;
967
968		DBG("bo_map: %d (%s)\n", bo_gem->gem_handle, bo_gem->name);
969
970		memset(&mmap_arg, 0, sizeof(mmap_arg));
971		mmap_arg.handle = bo_gem->gem_handle;
972		mmap_arg.offset = 0;
973		mmap_arg.size = bo->size;
974		do {
975			ret = ioctl(bufmgr_gem->fd,
976				    DRM_IOCTL_I915_GEM_MMAP,
977				    &mmap_arg);
978		} while (ret == -1 && errno == EINTR);
979		if (ret != 0) {
980			ret = -errno;
981			fprintf(stderr,
982				"%s:%d: Error mapping buffer %d (%s): %s .\n",
983				__FILE__, __LINE__, bo_gem->gem_handle,
984				bo_gem->name, strerror(errno));
985			pthread_mutex_unlock(&bufmgr_gem->lock);
986			return ret;
987		}
988		bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
989	}
990	DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
991	    bo_gem->mem_virtual);
992	bo->virtual = bo_gem->mem_virtual;
993
994	set_domain.handle = bo_gem->gem_handle;
995	set_domain.read_domains = I915_GEM_DOMAIN_CPU;
996	if (write_enable)
997		set_domain.write_domain = I915_GEM_DOMAIN_CPU;
998	else
999		set_domain.write_domain = 0;
1000	do {
1001		ret = ioctl(bufmgr_gem->fd,
1002			    DRM_IOCTL_I915_GEM_SET_DOMAIN,
1003			    &set_domain);
1004	} while (ret == -1 && errno == EINTR);
1005	if (ret != 0) {
1006		ret = -errno;
1007		fprintf(stderr, "%s:%d: Error setting to CPU domain %d: %s\n",
1008			__FILE__, __LINE__, bo_gem->gem_handle,
1009			strerror(errno));
1010		pthread_mutex_unlock(&bufmgr_gem->lock);
1011		return ret;
1012	}
1013
1014	pthread_mutex_unlock(&bufmgr_gem->lock);
1015
1016	return 0;
1017}
1018
1019int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
1020{
1021	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1022	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1023	struct drm_i915_gem_set_domain set_domain;
1024	int ret;
1025
1026	pthread_mutex_lock(&bufmgr_gem->lock);
1027
1028	/* Get a mapping of the buffer if we haven't before. */
1029	if (bo_gem->gtt_virtual == NULL) {
1030		struct drm_i915_gem_mmap_gtt mmap_arg;
1031
1032		DBG("bo_map_gtt: mmap %d (%s)\n", bo_gem->gem_handle,
1033		    bo_gem->name);
1034
1035		memset(&mmap_arg, 0, sizeof(mmap_arg));
1036		mmap_arg.handle = bo_gem->gem_handle;
1037
1038		/* Get the fake offset back... */
1039		do {
1040			ret = ioctl(bufmgr_gem->fd,
1041				    DRM_IOCTL_I915_GEM_MMAP_GTT,
1042				    &mmap_arg);
1043		} while (ret == -1 && errno == EINTR);
1044		if (ret != 0) {
1045			ret = -errno;
1046			fprintf(stderr,
1047				"%s:%d: Error preparing buffer map %d (%s): %s .\n",
1048				__FILE__, __LINE__,
1049				bo_gem->gem_handle, bo_gem->name,
1050				strerror(errno));
1051			pthread_mutex_unlock(&bufmgr_gem->lock);
1052			return ret;
1053		}
1054
1055		/* and mmap it */
1056		bo_gem->gtt_virtual = mmap(0, bo->size, PROT_READ | PROT_WRITE,
1057					   MAP_SHARED, bufmgr_gem->fd,
1058					   mmap_arg.offset);
1059		if (bo_gem->gtt_virtual == MAP_FAILED) {
1060			bo_gem->gtt_virtual = NULL;
1061			ret = -errno;
1062			fprintf(stderr,
1063				"%s:%d: Error mapping buffer %d (%s): %s .\n",
1064				__FILE__, __LINE__,
1065				bo_gem->gem_handle, bo_gem->name,
1066				strerror(errno));
1067			pthread_mutex_unlock(&bufmgr_gem->lock);
1068			return ret;
1069		}
1070	}
1071
1072	bo->virtual = bo_gem->gtt_virtual;
1073
1074	DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1075	    bo_gem->gtt_virtual);
1076
1077	/* Now move it to the GTT domain so that the CPU caches are flushed */
1078	set_domain.handle = bo_gem->gem_handle;
1079	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1080	set_domain.write_domain = I915_GEM_DOMAIN_GTT;
1081	do {
1082		ret = ioctl(bufmgr_gem->fd,
1083			    DRM_IOCTL_I915_GEM_SET_DOMAIN,
1084			    &set_domain);
1085	} while (ret == -1 && errno == EINTR);
1086
1087	if (ret != 0) {
1088		ret = -errno;
1089		fprintf(stderr, "%s:%d: Error setting domain %d: %s\n",
1090			__FILE__, __LINE__, bo_gem->gem_handle,
1091			strerror(errno));
1092	}
1093
1094	pthread_mutex_unlock(&bufmgr_gem->lock);
1095
1096	return ret;
1097}
1098
1099int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo)
1100{
1101	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1102	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1103	int ret = 0;
1104
1105	if (bo == NULL)
1106		return 0;
1107
1108	assert(bo_gem->gtt_virtual != NULL);
1109
1110	pthread_mutex_lock(&bufmgr_gem->lock);
1111	bo->virtual = NULL;
1112	pthread_mutex_unlock(&bufmgr_gem->lock);
1113
1114	return ret;
1115}
1116
1117static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
1118{
1119	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1120	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1121	struct drm_i915_gem_sw_finish sw_finish;
1122	int ret;
1123
1124	if (bo == NULL)
1125		return 0;
1126
1127	assert(bo_gem->mem_virtual != NULL);
1128
1129	pthread_mutex_lock(&bufmgr_gem->lock);
1130
1131	/* Cause a flush to happen if the buffer's pinned for scanout, so the
1132	 * results show up in a timely manner.
1133	 */
1134	sw_finish.handle = bo_gem->gem_handle;
1135	do {
1136		ret = ioctl(bufmgr_gem->fd,
1137			    DRM_IOCTL_I915_GEM_SW_FINISH,
1138			    &sw_finish);
1139	} while (ret == -1 && errno == EINTR);
1140	ret = ret == -1 ? -errno : 0;
1141
1142	bo->virtual = NULL;
1143	pthread_mutex_unlock(&bufmgr_gem->lock);
1144
1145	return ret;
1146}
1147
1148static int
1149drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
1150			 unsigned long size, const void *data)
1151{
1152	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1153	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1154	struct drm_i915_gem_pwrite pwrite;
1155	int ret;
1156
1157	memset(&pwrite, 0, sizeof(pwrite));
1158	pwrite.handle = bo_gem->gem_handle;
1159	pwrite.offset = offset;
1160	pwrite.size = size;
1161	pwrite.data_ptr = (uint64_t) (uintptr_t) data;
1162	do {
1163		ret = ioctl(bufmgr_gem->fd,
1164			    DRM_IOCTL_I915_GEM_PWRITE,
1165			    &pwrite);
1166	} while (ret == -1 && errno == EINTR);
1167	if (ret != 0) {
1168		ret = -errno;
1169		fprintf(stderr,
1170			"%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
1171			__FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1172			(int)size, strerror(errno));
1173	}
1174
1175	return ret;
1176}
1177
1178static int
1179drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id)
1180{
1181	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1182	struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id;
1183	int ret;
1184
1185	get_pipe_from_crtc_id.crtc_id = crtc_id;
1186	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID,
1187		    &get_pipe_from_crtc_id);
1188	if (ret != 0) {
1189		/* We return -1 here to signal that we don't
1190		 * know which pipe is associated with this crtc.
1191		 * This lets the caller know that this information
1192		 * isn't available; using the wrong pipe for
1193		 * vblank waiting can cause the chipset to lock up
1194		 */
1195		return -1;
1196	}
1197
1198	return get_pipe_from_crtc_id.pipe;
1199}
1200
1201static int
1202drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
1203			     unsigned long size, void *data)
1204{
1205	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1206	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1207	struct drm_i915_gem_pread pread;
1208	int ret;
1209
1210	memset(&pread, 0, sizeof(pread));
1211	pread.handle = bo_gem->gem_handle;
1212	pread.offset = offset;
1213	pread.size = size;
1214	pread.data_ptr = (uint64_t) (uintptr_t) data;
1215	do {
1216		ret = ioctl(bufmgr_gem->fd,
1217			    DRM_IOCTL_I915_GEM_PREAD,
1218			    &pread);
1219	} while (ret == -1 && errno == EINTR);
1220	if (ret != 0) {
1221		ret = -errno;
1222		fprintf(stderr,
1223			"%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
1224			__FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1225			(int)size, strerror(errno));
1226	}
1227
1228	return ret;
1229}
1230
1231/** Waits for all GPU rendering to the object to have completed. */
1232static void
1233drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo)
1234{
1235	drm_intel_gem_bo_start_gtt_access(bo, 0);
1236}
1237
1238/**
1239 * Sets the object to the GTT read and possibly write domain, used by the X
1240 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt().
1241 *
1242 * In combination with drm_intel_gem_bo_pin() and manual fence management, we
1243 * can do tiled pixmaps this way.
1244 */
1245void
1246drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable)
1247{
1248	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1249	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1250	struct drm_i915_gem_set_domain set_domain;
1251	int ret;
1252
1253	set_domain.handle = bo_gem->gem_handle;
1254	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1255	set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
1256	do {
1257		ret = ioctl(bufmgr_gem->fd,
1258			    DRM_IOCTL_I915_GEM_SET_DOMAIN,
1259			    &set_domain);
1260	} while (ret == -1 && errno == EINTR);
1261	if (ret != 0) {
1262		fprintf(stderr,
1263			"%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
1264			__FILE__, __LINE__, bo_gem->gem_handle,
1265			set_domain.read_domains, set_domain.write_domain,
1266			strerror(errno));
1267	}
1268}
1269
1270static void
1271drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
1272{
1273	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1274	int i;
1275
1276	free(bufmgr_gem->exec2_objects);
1277	free(bufmgr_gem->exec_objects);
1278	free(bufmgr_gem->exec_bos);
1279
1280	pthread_mutex_destroy(&bufmgr_gem->lock);
1281
1282	/* Free any cached buffer objects we were going to reuse */
1283	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
1284		struct drm_intel_gem_bo_bucket *bucket =
1285		    &bufmgr_gem->cache_bucket[i];
1286		drm_intel_bo_gem *bo_gem;
1287
1288		while (!DRMLISTEMPTY(&bucket->head)) {
1289			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1290					      bucket->head.next, head);
1291			DRMLISTDEL(&bo_gem->head);
1292
1293			drm_intel_gem_bo_free(&bo_gem->bo);
1294		}
1295	}
1296
1297	free(bufmgr);
1298}
1299
1300/**
1301 * Adds the target buffer to the validation list and adds the relocation
1302 * to the reloc_buffer's relocation list.
1303 *
1304 * The relocation entry at the given offset must already contain the
1305 * precomputed relocation value, because the kernel will optimize out
1306 * the relocation entry write when the buffer hasn't moved from the
1307 * last known offset in target_bo.
1308 */
1309static int
1310do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
1311		 drm_intel_bo *target_bo, uint32_t target_offset,
1312		 uint32_t read_domains, uint32_t write_domain,
1313		 int need_fence)
1314{
1315	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1316	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1317	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
1318
1319	if (bo_gem->has_error)
1320		return -ENOMEM;
1321
1322	if (target_bo_gem->has_error) {
1323		bo_gem->has_error = 1;
1324		return -ENOMEM;
1325	}
1326
1327	if (target_bo_gem->tiling_mode == I915_TILING_NONE)
1328		need_fence = 0;
1329
1330	/* We never use HW fences for rendering on 965+ */
1331	if (bufmgr_gem->gen >= 4)
1332		need_fence = 0;
1333
1334	/* Create a new relocation list if needed */
1335	if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo))
1336		return -ENOMEM;
1337
1338	/* Check overflow */
1339	assert(bo_gem->reloc_count < bufmgr_gem->max_relocs);
1340
1341	/* Check args */
1342	assert(offset <= bo->size - 4);
1343	assert((write_domain & (write_domain - 1)) == 0);
1344
1345	/* Make sure that we're not adding a reloc to something whose size has
1346	 * already been accounted for.
1347	 */
1348	assert(!bo_gem->used_as_reloc_target);
1349	if (target_bo_gem != bo_gem) {
1350		target_bo_gem->used_as_reloc_target = 1;
1351		bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
1352	}
1353	/* An object needing a fence is a tiled buffer, so it won't have
1354	 * relocs to other buffers.
1355	 */
1356	if (need_fence)
1357		target_bo_gem->reloc_tree_fences = 1;
1358	bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
1359
1360	/* Flag the target to disallow further relocations in it. */
1361
1362	bo_gem->relocs[bo_gem->reloc_count].offset = offset;
1363	bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
1364	bo_gem->relocs[bo_gem->reloc_count].target_handle =
1365	    target_bo_gem->gem_handle;
1366	bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
1367	bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
1368	bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset;
1369
1370	bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
1371	if (target_bo != bo)
1372		drm_intel_gem_bo_reference(target_bo);
1373	if (need_fence)
1374		bo_gem->reloc_target_info[bo_gem->reloc_count].flags =
1375			DRM_INTEL_RELOC_FENCE;
1376	else
1377		bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
1378
1379	bo_gem->reloc_count++;
1380
1381	return 0;
1382}
1383
1384static int
1385drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
1386			    drm_intel_bo *target_bo, uint32_t target_offset,
1387			    uint32_t read_domains, uint32_t write_domain)
1388{
1389	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
1390
1391	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
1392				read_domains, write_domain,
1393				!bufmgr_gem->fenced_relocs);
1394}
1395
1396static int
1397drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
1398				  drm_intel_bo *target_bo,
1399				  uint32_t target_offset,
1400				  uint32_t read_domains, uint32_t write_domain)
1401{
1402	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
1403				read_domains, write_domain, 1);
1404}
1405
1406/**
1407 * Walk the tree of relocations rooted at BO and accumulate the list of
1408 * validations to be performed and update the relocation buffers with
1409 * index values into the validation list.
1410 */
1411static void
1412drm_intel_gem_bo_process_reloc(drm_intel_bo *bo)
1413{
1414	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1415	int i;
1416
1417	if (bo_gem->relocs == NULL)
1418		return;
1419
1420	for (i = 0; i < bo_gem->reloc_count; i++) {
1421		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
1422
1423		if (target_bo == bo)
1424			continue;
1425
1426		/* Continue walking the tree depth-first. */
1427		drm_intel_gem_bo_process_reloc(target_bo);
1428
1429		/* Add the target to the validate list */
1430		drm_intel_add_validate_buffer(target_bo);
1431	}
1432}
1433
1434static void
1435drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
1436{
1437	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1438	int i;
1439
1440	if (bo_gem->relocs == NULL)
1441		return;
1442
1443	for (i = 0; i < bo_gem->reloc_count; i++) {
1444		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
1445		int need_fence;
1446
1447		if (target_bo == bo)
1448			continue;
1449
1450		/* Continue walking the tree depth-first. */
1451		drm_intel_gem_bo_process_reloc2(target_bo);
1452
1453		need_fence = (bo_gem->reloc_target_info[i].flags &
1454			      DRM_INTEL_RELOC_FENCE);
1455
1456		/* Add the target to the validate list */
1457		drm_intel_add_validate_buffer2(target_bo, need_fence);
1458	}
1459}
1460
1461
1462static void
1463drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem)
1464{
1465	int i;
1466
1467	for (i = 0; i < bufmgr_gem->exec_count; i++) {
1468		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
1469		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1470
1471		/* Update the buffer offset */
1472		if (bufmgr_gem->exec_objects[i].offset != bo->offset) {
1473			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
1474			    bo_gem->gem_handle, bo_gem->name, bo->offset,
1475			    (unsigned long long)bufmgr_gem->exec_objects[i].
1476			    offset);
1477			bo->offset = bufmgr_gem->exec_objects[i].offset;
1478		}
1479	}
1480}
1481
1482static void
1483drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
1484{
1485	int i;
1486
1487	for (i = 0; i < bufmgr_gem->exec_count; i++) {
1488		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
1489		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1490
1491		/* Update the buffer offset */
1492		if (bufmgr_gem->exec2_objects[i].offset != bo->offset) {
1493			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
1494			    bo_gem->gem_handle, bo_gem->name, bo->offset,
1495			    (unsigned long long)bufmgr_gem->exec2_objects[i].offset);
1496			bo->offset = bufmgr_gem->exec2_objects[i].offset;
1497		}
1498	}
1499}
1500
1501static int
1502drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
1503		      drm_clip_rect_t * cliprects, int num_cliprects, int DR4)
1504{
1505	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1506	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1507	struct drm_i915_gem_execbuffer execbuf;
1508	int ret, i;
1509
1510	if (bo_gem->has_error)
1511		return -ENOMEM;
1512
1513	pthread_mutex_lock(&bufmgr_gem->lock);
1514	/* Update indices and set up the validate list. */
1515	drm_intel_gem_bo_process_reloc(bo);
1516
1517	/* Add the batch buffer to the validation list.  There are no
1518	 * relocations pointing to it.
1519	 */
1520	drm_intel_add_validate_buffer(bo);
1521
1522	execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects;
1523	execbuf.buffer_count = bufmgr_gem->exec_count;
1524	execbuf.batch_start_offset = 0;
1525	execbuf.batch_len = used;
1526	execbuf.cliprects_ptr = (uintptr_t) cliprects;
1527	execbuf.num_cliprects = num_cliprects;
1528	execbuf.DR1 = 0;
1529	execbuf.DR4 = DR4;
1530
1531	do {
1532		ret = ioctl(bufmgr_gem->fd,
1533			    DRM_IOCTL_I915_GEM_EXECBUFFER,
1534			    &execbuf);
1535	} while (ret != 0 && errno == EINTR);
1536
1537	if (ret != 0) {
1538		ret = -errno;
1539		if (errno == ENOSPC) {
1540			fprintf(stderr,
1541				"Execbuffer fails to pin. "
1542				"Estimate: %u. Actual: %u. Available: %u\n",
1543				drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
1544								   bufmgr_gem->
1545								   exec_count),
1546				drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
1547								  bufmgr_gem->
1548								  exec_count),
1549				(unsigned int)bufmgr_gem->gtt_size);
1550		}
1551	}
1552	drm_intel_update_buffer_offsets(bufmgr_gem);
1553
1554	if (bufmgr_gem->bufmgr.debug)
1555		drm_intel_gem_dump_validation_list(bufmgr_gem);
1556
1557	for (i = 0; i < bufmgr_gem->exec_count; i++) {
1558		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
1559		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1560
1561		/* Disconnect the buffer from the validate list */
1562		bo_gem->validate_index = -1;
1563		bufmgr_gem->exec_bos[i] = NULL;
1564	}
1565	bufmgr_gem->exec_count = 0;
1566	pthread_mutex_unlock(&bufmgr_gem->lock);
1567
1568	return ret;
1569}
1570
1571static int
1572drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
1573			drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
1574			int ring_flag)
1575{
1576	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
1577	struct drm_i915_gem_execbuffer2 execbuf;
1578	int ret, i;
1579
1580	if ((ring_flag != I915_EXEC_RENDER) && (ring_flag != I915_EXEC_BSD))
1581		return -EINVAL;
1582
1583	pthread_mutex_lock(&bufmgr_gem->lock);
1584	/* Update indices and set up the validate list. */
1585	drm_intel_gem_bo_process_reloc2(bo);
1586
1587	/* Add the batch buffer to the validation list.  There are no relocations
1588	 * pointing to it.
1589	 */
1590	drm_intel_add_validate_buffer2(bo, 0);
1591
1592	execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
1593	execbuf.buffer_count = bufmgr_gem->exec_count;
1594	execbuf.batch_start_offset = 0;
1595	execbuf.batch_len = used;
1596	execbuf.cliprects_ptr = (uintptr_t)cliprects;
1597	execbuf.num_cliprects = num_cliprects;
1598	execbuf.DR1 = 0;
1599	execbuf.DR4 = DR4;
1600	execbuf.flags = ring_flag;
1601	execbuf.rsvd1 = 0;
1602	execbuf.rsvd2 = 0;
1603
1604	do {
1605		ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2,
1606			    &execbuf);
1607	} while (ret != 0 && errno == EINTR);
1608
1609	if (ret != 0) {
1610		ret = -errno;
1611		if (ret == -ENOSPC) {
1612			fprintf(stderr,
1613				"Execbuffer fails to pin. "
1614				"Estimate: %u. Actual: %u. Available: %u\n",
1615				drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
1616								   bufmgr_gem->exec_count),
1617				drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
1618								  bufmgr_gem->exec_count),
1619				(unsigned int) bufmgr_gem->gtt_size);
1620		}
1621	}
1622	drm_intel_update_buffer_offsets2(bufmgr_gem);
1623
1624	if (bufmgr_gem->bufmgr.debug)
1625		drm_intel_gem_dump_validation_list(bufmgr_gem);
1626
1627	for (i = 0; i < bufmgr_gem->exec_count; i++) {
1628		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
1629		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1630
1631		/* Disconnect the buffer from the validate list */
1632		bo_gem->validate_index = -1;
1633		bufmgr_gem->exec_bos[i] = NULL;
1634	}
1635	bufmgr_gem->exec_count = 0;
1636	pthread_mutex_unlock(&bufmgr_gem->lock);
1637
1638	return ret;
1639}
1640
1641static int
1642drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used,
1643		       drm_clip_rect_t *cliprects, int num_cliprects,
1644		       int DR4)
1645{
1646	return drm_intel_gem_bo_mrb_exec2(bo, used,
1647					cliprects, num_cliprects, DR4,
1648					I915_EXEC_RENDER);
1649}
1650
1651static int
1652drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment)
1653{
1654	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1655	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1656	struct drm_i915_gem_pin pin;
1657	int ret;
1658
1659	memset(&pin, 0, sizeof(pin));
1660	pin.handle = bo_gem->gem_handle;
1661	pin.alignment = alignment;
1662
1663	do {
1664		ret = ioctl(bufmgr_gem->fd,
1665			    DRM_IOCTL_I915_GEM_PIN,
1666			    &pin);
1667	} while (ret == -1 && errno == EINTR);
1668
1669	if (ret != 0)
1670		return -errno;
1671
1672	bo->offset = pin.offset;
1673	return 0;
1674}
1675
1676static int
1677drm_intel_gem_bo_unpin(drm_intel_bo *bo)
1678{
1679	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1680	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1681	struct drm_i915_gem_unpin unpin;
1682	int ret;
1683
1684	memset(&unpin, 0, sizeof(unpin));
1685	unpin.handle = bo_gem->gem_handle;
1686
1687	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin);
1688	if (ret != 0)
1689		return -errno;
1690
1691	return 0;
1692}
1693
1694static int
1695drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
1696				     uint32_t tiling_mode,
1697				     uint32_t stride)
1698{
1699	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1700	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1701	struct drm_i915_gem_set_tiling set_tiling;
1702	int ret;
1703
1704	if (tiling_mode == bo_gem->tiling_mode &&
1705	    stride == bo_gem->stride)
1706		return 0;
1707
1708	memset(&set_tiling, 0, sizeof(set_tiling));
1709	do {
1710		set_tiling.handle = bo_gem->gem_handle;
1711		set_tiling.tiling_mode = tiling_mode;
1712		set_tiling.stride = stride;
1713
1714		ret = ioctl(bufmgr_gem->fd,
1715			    DRM_IOCTL_I915_GEM_SET_TILING,
1716			    &set_tiling);
1717	} while (ret == -1 && errno == EINTR);
1718	if (ret == -1)
1719		return -errno;
1720
1721	bo_gem->tiling_mode = set_tiling.tiling_mode;
1722	bo_gem->swizzle_mode = set_tiling.swizzle_mode;
1723	bo_gem->stride = stride;
1724	return 0;
1725}
1726
1727static int
1728drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
1729			    uint32_t stride)
1730{
1731	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1732	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1733	int ret;
1734
1735	if (bo_gem->global_name == 0)
1736		return 0;
1737
1738	ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride);
1739	if (ret == 0)
1740		drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
1741
1742	*tiling_mode = bo_gem->tiling_mode;
1743	return ret;
1744}
1745
1746static int
1747drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
1748			    uint32_t * swizzle_mode)
1749{
1750	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1751
1752	*tiling_mode = bo_gem->tiling_mode;
1753	*swizzle_mode = bo_gem->swizzle_mode;
1754	return 0;
1755}
1756
1757static int
1758drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name)
1759{
1760	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1761	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1762	struct drm_gem_flink flink;
1763	int ret;
1764
1765	if (!bo_gem->global_name) {
1766		memset(&flink, 0, sizeof(flink));
1767		flink.handle = bo_gem->gem_handle;
1768
1769		ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink);
1770		if (ret != 0)
1771			return -errno;
1772		bo_gem->global_name = flink.name;
1773		bo_gem->reusable = 0;
1774	}
1775
1776	*name = bo_gem->global_name;
1777	return 0;
1778}
1779
1780/**
1781 * Enables unlimited caching of buffer objects for reuse.
1782 *
1783 * This is potentially very memory expensive, as the cache at each bucket
1784 * size is only bounded by how many buffers of that size we've managed to have
1785 * in flight at once.
1786 */
1787void
1788drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr)
1789{
1790	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1791
1792	bufmgr_gem->bo_reuse = 1;
1793}
1794
1795/**
1796 * Enable use of fenced reloc type.
1797 *
1798 * New code should enable this to avoid unnecessary fence register
1799 * allocation.  If this option is not enabled, all relocs will have fence
1800 * register allocated.
1801 */
1802void
1803drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr)
1804{
1805	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
1806
1807	if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2)
1808		bufmgr_gem->fenced_relocs = 1;
1809}
1810
1811/**
1812 * Return the additional aperture space required by the tree of buffer objects
1813 * rooted at bo.
1814 */
1815static int
1816drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo)
1817{
1818	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1819	int i;
1820	int total = 0;
1821
1822	if (bo == NULL || bo_gem->included_in_check_aperture)
1823		return 0;
1824
1825	total += bo->size;
1826	bo_gem->included_in_check_aperture = 1;
1827
1828	for (i = 0; i < bo_gem->reloc_count; i++)
1829		total +=
1830		    drm_intel_gem_bo_get_aperture_space(bo_gem->
1831							reloc_target_info[i].bo);
1832
1833	return total;
1834}
1835
1836/**
1837 * Count the number of buffers in this list that need a fence reg
1838 *
1839 * If the count is greater than the number of available regs, we'll have
1840 * to ask the caller to resubmit a batch with fewer tiled buffers.
1841 *
1842 * This function over-counts if the same buffer is used multiple times.
1843 */
1844static unsigned int
1845drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count)
1846{
1847	int i;
1848	unsigned int total = 0;
1849
1850	for (i = 0; i < count; i++) {
1851		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
1852
1853		if (bo_gem == NULL)
1854			continue;
1855
1856		total += bo_gem->reloc_tree_fences;
1857	}
1858	return total;
1859}
1860
1861/**
1862 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready
1863 * for the next drm_intel_bufmgr_check_aperture_space() call.
1864 */
1865static void
1866drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo)
1867{
1868	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1869	int i;
1870
1871	if (bo == NULL || !bo_gem->included_in_check_aperture)
1872		return;
1873
1874	bo_gem->included_in_check_aperture = 0;
1875
1876	for (i = 0; i < bo_gem->reloc_count; i++)
1877		drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->
1878							   reloc_target_info[i].bo);
1879}
1880
1881/**
1882 * Return a conservative estimate for the amount of aperture required
1883 * for a collection of buffers. This may double-count some buffers.
1884 */
1885static unsigned int
1886drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count)
1887{
1888	int i;
1889	unsigned int total = 0;
1890
1891	for (i = 0; i < count; i++) {
1892		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
1893		if (bo_gem != NULL)
1894			total += bo_gem->reloc_tree_size;
1895	}
1896	return total;
1897}
1898
1899/**
1900 * Return the amount of aperture needed for a collection of buffers.
1901 * This avoids double counting any buffers, at the cost of looking
1902 * at every buffer in the set.
1903 */
1904static unsigned int
1905drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count)
1906{
1907	int i;
1908	unsigned int total = 0;
1909
1910	for (i = 0; i < count; i++) {
1911		total += drm_intel_gem_bo_get_aperture_space(bo_array[i]);
1912		/* For the first buffer object in the array, we get an
1913		 * accurate count back for its reloc_tree size (since nothing
1914		 * had been flagged as being counted yet).  We can save that
1915		 * value out as a more conservative reloc_tree_size that
1916		 * avoids double-counting target buffers.  Since the first
1917		 * buffer happens to usually be the batch buffer in our
1918		 * callers, this can pull us back from doing the tree
1919		 * walk on every new batch emit.
1920		 */
1921		if (i == 0) {
1922			drm_intel_bo_gem *bo_gem =
1923			    (drm_intel_bo_gem *) bo_array[i];
1924			bo_gem->reloc_tree_size = total;
1925		}
1926	}
1927
1928	for (i = 0; i < count; i++)
1929		drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]);
1930	return total;
1931}
1932
1933/**
1934 * Return -1 if the batchbuffer should be flushed before attempting to
1935 * emit rendering referencing the buffers pointed to by bo_array.
1936 *
1937 * This is required because if we try to emit a batchbuffer with relocations
1938 * to a tree of buffers that won't simultaneously fit in the aperture,
1939 * the rendering will return an error at a point where the software is not
1940 * prepared to recover from it.
1941 *
1942 * However, we also want to emit the batchbuffer significantly before we reach
1943 * the limit, as a series of batchbuffers each of which references buffers
1944 * covering almost all of the aperture means that at each emit we end up
1945 * waiting to evict a buffer from the last rendering, and we get synchronous
1946 * performance.  By emitting smaller batchbuffers, we eat some CPU overhead to
1947 * get better parallelism.
1948 */
1949static int
1950drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count)
1951{
1952	drm_intel_bufmgr_gem *bufmgr_gem =
1953	    (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr;
1954	unsigned int total = 0;
1955	unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4;
1956	int total_fences;
1957
1958	/* Check for fence reg constraints if necessary */
1959	if (bufmgr_gem->available_fences) {
1960		total_fences = drm_intel_gem_total_fences(bo_array, count);
1961		if (total_fences > bufmgr_gem->available_fences)
1962			return -ENOSPC;
1963	}
1964
1965	total = drm_intel_gem_estimate_batch_space(bo_array, count);
1966
1967	if (total > threshold)
1968		total = drm_intel_gem_compute_batch_space(bo_array, count);
1969
1970	if (total > threshold) {
1971		DBG("check_space: overflowed available aperture, "
1972		    "%dkb vs %dkb\n",
1973		    total / 1024, (int)bufmgr_gem->gtt_size / 1024);
1974		return -ENOSPC;
1975	} else {
1976		DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024,
1977		    (int)bufmgr_gem->gtt_size / 1024);
1978		return 0;
1979	}
1980}
1981
1982/*
1983 * Disable buffer reuse for objects which are shared with the kernel
1984 * as scanout buffers
1985 */
1986static int
1987drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo)
1988{
1989	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1990
1991	bo_gem->reusable = 0;
1992	return 0;
1993}
1994
1995static int
1996drm_intel_gem_bo_is_reusable(drm_intel_bo *bo)
1997{
1998	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1999
2000	return bo_gem->reusable;
2001}
2002
2003static int
2004_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
2005{
2006	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2007	int i;
2008
2009	for (i = 0; i < bo_gem->reloc_count; i++) {
2010		if (bo_gem->reloc_target_info[i].bo == target_bo)
2011			return 1;
2012		if (bo == bo_gem->reloc_target_info[i].bo)
2013			continue;
2014		if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo,
2015						target_bo))
2016			return 1;
2017	}
2018
2019	return 0;
2020}
2021
2022/** Return true if target_bo is referenced by bo's relocation tree. */
2023static int
2024drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
2025{
2026	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
2027
2028	if (bo == NULL || target_bo == NULL)
2029		return 0;
2030	if (target_bo_gem->used_as_reloc_target)
2031		return _drm_intel_gem_bo_references(bo, target_bo);
2032	return 0;
2033}
2034
2035static void
2036add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size)
2037{
2038	unsigned int i = bufmgr_gem->num_buckets;
2039
2040	assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket));
2041
2042	DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
2043	bufmgr_gem->cache_bucket[i].size = size;
2044	bufmgr_gem->num_buckets++;
2045}
2046
2047static void
2048init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem)
2049{
2050	unsigned long size, cache_max_size = 64 * 1024 * 1024;
2051
2052	/* OK, so power of two buckets was too wasteful of memory.
2053	 * Give 3 other sizes between each power of two, to hopefully
2054	 * cover things accurately enough.  (The alternative is
2055	 * probably to just go for exact matching of sizes, and assume
2056	 * that for things like composited window resize the tiled
2057	 * width/height alignment and rounding of sizes to pages will
2058	 * get us useful cache hit rates anyway)
2059	 */
2060	add_bucket(bufmgr_gem, 4096);
2061	add_bucket(bufmgr_gem, 4096 * 2);
2062	add_bucket(bufmgr_gem, 4096 * 3);
2063
2064	/* Initialize the linked lists for BO reuse cache. */
2065	for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
2066		add_bucket(bufmgr_gem, size);
2067
2068		add_bucket(bufmgr_gem, size + size * 1 / 4);
2069		add_bucket(bufmgr_gem, size + size * 2 / 4);
2070		add_bucket(bufmgr_gem, size + size * 3 / 4);
2071	}
2072}
2073
2074/**
2075 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
2076 * and manage map buffer objections.
2077 *
2078 * \param fd File descriptor of the opened DRM device.
2079 */
2080drm_intel_bufmgr *
2081drm_intel_bufmgr_gem_init(int fd, int batch_size)
2082{
2083	drm_intel_bufmgr_gem *bufmgr_gem;
2084	struct drm_i915_gem_get_aperture aperture;
2085	drm_i915_getparam_t gp;
2086	int ret;
2087	int exec2 = 0, has_bsd = 0;
2088
2089	bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
2090	if (bufmgr_gem == NULL)
2091		return NULL;
2092
2093	bufmgr_gem->fd = fd;
2094
2095	if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) {
2096		free(bufmgr_gem);
2097		return NULL;
2098	}
2099
2100	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
2101
2102	if (ret == 0)
2103		bufmgr_gem->gtt_size = aperture.aper_available_size;
2104	else {
2105		fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n",
2106			strerror(errno));
2107		bufmgr_gem->gtt_size = 128 * 1024 * 1024;
2108		fprintf(stderr, "Assuming %dkB available aperture size.\n"
2109			"May lead to reduced performance or incorrect "
2110			"rendering.\n",
2111			(int)bufmgr_gem->gtt_size / 1024);
2112	}
2113
2114	gp.param = I915_PARAM_CHIPSET_ID;
2115	gp.value = &bufmgr_gem->pci_device;
2116	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
2117	if (ret) {
2118		fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno);
2119		fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
2120	}
2121
2122	if (IS_GEN2(bufmgr_gem))
2123		bufmgr_gem->gen = 2;
2124	else if (IS_GEN3(bufmgr_gem))
2125		bufmgr_gem->gen = 3;
2126	else if (IS_GEN4(bufmgr_gem))
2127		bufmgr_gem->gen = 4;
2128	else
2129		bufmgr_gem->gen = 6;
2130
2131	gp.param = I915_PARAM_HAS_EXECBUF2;
2132	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
2133	if (!ret)
2134		exec2 = 1;
2135
2136	gp.param = I915_PARAM_HAS_BSD;
2137	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
2138	if (!ret)
2139		has_bsd = 1;
2140
2141	if (bufmgr_gem->gen < 4) {
2142		gp.param = I915_PARAM_NUM_FENCES_AVAIL;
2143		gp.value = &bufmgr_gem->available_fences;
2144		ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
2145		if (ret) {
2146			fprintf(stderr, "get fences failed: %d [%d]\n", ret,
2147				errno);
2148			fprintf(stderr, "param: %d, val: %d\n", gp.param,
2149				*gp.value);
2150			bufmgr_gem->available_fences = 0;
2151		} else {
2152			/* XXX The kernel reports the total number of fences,
2153			 * including any that may be pinned.
2154			 *
2155			 * We presume that there will be at least one pinned
2156			 * fence for the scanout buffer, but there may be more
2157			 * than one scanout and the user may be manually
2158			 * pinning buffers. Let's move to execbuffer2 and
2159			 * thereby forget the insanity of using fences...
2160			 */
2161			bufmgr_gem->available_fences -= 2;
2162			if (bufmgr_gem->available_fences < 0)
2163				bufmgr_gem->available_fences = 0;
2164		}
2165	}
2166
2167	/* Let's go with one relocation per every 2 dwords (but round down a bit
2168	 * since a power of two will mean an extra page allocation for the reloc
2169	 * buffer).
2170	 *
2171	 * Every 4 was too few for the blender benchmark.
2172	 */
2173	bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
2174
2175	bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc;
2176	bufmgr_gem->bufmgr.bo_alloc_for_render =
2177	    drm_intel_gem_bo_alloc_for_render;
2178	bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled;
2179	bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference;
2180	bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference;
2181	bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map;
2182	bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap;
2183	bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata;
2184	bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata;
2185	bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
2186	bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
2187	bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
2188	bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
2189	bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
2190	bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
2191	bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling;
2192	bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink;
2193	/* Use the new one if available */
2194	if (exec2) {
2195		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2;
2196		if (has_bsd)
2197			bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2;
2198	} else
2199		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec;
2200	bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy;
2201	bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise;
2202	bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy;
2203	bufmgr_gem->bufmgr.debug = 0;
2204	bufmgr_gem->bufmgr.check_aperture_space =
2205	    drm_intel_gem_check_aperture_space;
2206	bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse;
2207	bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable;
2208	bufmgr_gem->bufmgr.get_pipe_from_crtc_id =
2209	    drm_intel_gem_get_pipe_from_crtc_id;
2210	bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references;
2211
2212	init_cache_buckets(bufmgr_gem);
2213
2214	return &bufmgr_gem->bufmgr;
2215}
2216