intel_bufmgr_gem.c revision 056aa9be04e923a45ca00924caa587d6dbf8821d
1/**************************************************************************
2 *
3 * Copyright � 2007 Red Hat Inc.
4 * Copyright � 2007 Intel Corporation
5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
26 * of the Software.
27 *
28 *
29 **************************************************************************/
30/*
31 * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com>
32 *          Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33 *	    Eric Anholt <eric@anholt.net>
34 *	    Dave Airlie <airlied@linux.ie>
35 */
36
37#ifdef HAVE_CONFIG_H
38#include "config.h"
39#endif
40
41#include <xf86drm.h>
42#include <xf86atomic.h>
43#include <fcntl.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <unistd.h>
48#include <assert.h>
49#include <pthread.h>
50#include <sys/ioctl.h>
51#include <sys/mman.h>
52#include <sys/stat.h>
53#include <sys/types.h>
54
55#include "errno.h"
56#include "libdrm_lists.h"
57#include "intel_bufmgr.h"
58#include "intel_bufmgr_priv.h"
59#include "intel_chipset.h"
60#include "string.h"
61
62#include "i915_drm.h"
63
64#define DBG(...) do {					\
65	if (bufmgr_gem->bufmgr.debug)			\
66		fprintf(stderr, __VA_ARGS__);		\
67} while (0)
68
69#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
70
71typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
72
73struct drm_intel_gem_bo_bucket {
74	drmMMListHead head;
75	unsigned long size;
76};
77
78typedef struct _drm_intel_bufmgr_gem {
79	drm_intel_bufmgr bufmgr;
80
81	int fd;
82
83	int max_relocs;
84
85	pthread_mutex_t lock;
86
87	struct drm_i915_gem_exec_object *exec_objects;
88	struct drm_i915_gem_exec_object2 *exec2_objects;
89	drm_intel_bo **exec_bos;
90	int exec_size;
91	int exec_count;
92
93	/** Array of lists of cached gem objects of power-of-two sizes */
94	struct drm_intel_gem_bo_bucket cache_bucket[14 * 4];
95	int num_buckets;
96
97	uint64_t gtt_size;
98	int available_fences;
99	int pci_device;
100	int gen;
101	char bo_reuse;
102	char fenced_relocs;
103} drm_intel_bufmgr_gem;
104
105#define DRM_INTEL_RELOC_FENCE (1<<0)
106
107typedef struct _drm_intel_reloc_target_info {
108	drm_intel_bo *bo;
109	int flags;
110} drm_intel_reloc_target;
111
112struct _drm_intel_bo_gem {
113	drm_intel_bo bo;
114
115	atomic_t refcount;
116	uint32_t gem_handle;
117	const char *name;
118
119	/**
120	 * Kenel-assigned global name for this object
121	 */
122	unsigned int global_name;
123
124	/**
125	 * Index of the buffer within the validation list while preparing a
126	 * batchbuffer execution.
127	 */
128	int validate_index;
129
130	/**
131	 * Current tiling mode
132	 */
133	uint32_t tiling_mode;
134	uint32_t swizzle_mode;
135	unsigned long stride;
136
137	time_t free_time;
138
139	/** Array passed to the DRM containing relocation information. */
140	struct drm_i915_gem_relocation_entry *relocs;
141	/**
142	 * Array of info structs corresponding to relocs[i].target_handle etc
143	 */
144	drm_intel_reloc_target *reloc_target_info;
145	/** Number of entries in relocs */
146	int reloc_count;
147	/** Mapped address for the buffer, saved across map/unmap cycles */
148	void *mem_virtual;
149	/** GTT virtual address for the buffer, saved across map/unmap cycles */
150	void *gtt_virtual;
151
152	/** BO cache list */
153	drmMMListHead head;
154
155	/**
156	 * Boolean of whether this BO and its children have been included in
157	 * the current drm_intel_bufmgr_check_aperture_space() total.
158	 */
159	char included_in_check_aperture;
160
161	/**
162	 * Boolean of whether this buffer has been used as a relocation
163	 * target and had its size accounted for, and thus can't have any
164	 * further relocations added to it.
165	 */
166	char used_as_reloc_target;
167
168	/**
169	 * Boolean of whether we have encountered an error whilst building the relocation tree.
170	 */
171	char has_error;
172
173	/**
174	 * Boolean of whether this buffer can be re-used
175	 */
176	char reusable;
177
178	/**
179	 * Size in bytes of this buffer and its relocation descendents.
180	 *
181	 * Used to avoid costly tree walking in
182	 * drm_intel_bufmgr_check_aperture in the common case.
183	 */
184	int reloc_tree_size;
185
186	/**
187	 * Number of potential fence registers required by this buffer and its
188	 * relocations.
189	 */
190	int reloc_tree_fences;
191};
192
193static unsigned int
194drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count);
195
196static unsigned int
197drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count);
198
199static int
200drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
201			    uint32_t * swizzle_mode);
202
203static int
204drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
205			    uint32_t stride);
206
207static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
208						      time_t time);
209
210static void drm_intel_gem_bo_unreference(drm_intel_bo *bo);
211
212static void drm_intel_gem_bo_free(drm_intel_bo *bo);
213
214static unsigned long
215drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size,
216			   uint32_t *tiling_mode)
217{
218	unsigned long min_size, max_size;
219	unsigned long i;
220
221	if (*tiling_mode == I915_TILING_NONE)
222		return size;
223
224	/* 965+ just need multiples of page size for tiling */
225	if (bufmgr_gem->gen >= 4)
226		return ROUND_UP_TO(size, 4096);
227
228	/* Older chips need powers of two, of at least 512k or 1M */
229	if (bufmgr_gem->gen == 3) {
230		min_size = 1024*1024;
231		max_size = 128*1024*1024;
232	} else {
233		min_size = 512*1024;
234		max_size = 64*1024*1024;
235	}
236
237	if (size > max_size) {
238		*tiling_mode = I915_TILING_NONE;
239		return size;
240	}
241
242	for (i = min_size; i < size; i <<= 1)
243		;
244
245	return i;
246}
247
248/*
249 * Round a given pitch up to the minimum required for X tiling on a
250 * given chip.  We use 512 as the minimum to allow for a later tiling
251 * change.
252 */
253static unsigned long
254drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem,
255			    unsigned long pitch, uint32_t tiling_mode)
256{
257	unsigned long tile_width;
258	unsigned long i;
259
260	/* If untiled, then just align it so that we can do rendering
261	 * to it with the 3D engine.
262	 */
263	if (tiling_mode == I915_TILING_NONE)
264		return ALIGN(pitch, 64);
265
266	if (tiling_mode == I915_TILING_X)
267		tile_width = 512;
268	else
269		tile_width = 128;
270
271	/* 965 is flexible */
272	if (bufmgr_gem->gen >= 4)
273		return ROUND_UP_TO(pitch, tile_width);
274
275	/* Pre-965 needs power of two tile width */
276	for (i = tile_width; i < pitch; i <<= 1)
277		;
278
279	return i;
280}
281
282static struct drm_intel_gem_bo_bucket *
283drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
284				 unsigned long size)
285{
286	int i;
287
288	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
289		struct drm_intel_gem_bo_bucket *bucket =
290		    &bufmgr_gem->cache_bucket[i];
291		if (bucket->size >= size) {
292			return bucket;
293		}
294	}
295
296	return NULL;
297}
298
299static void
300drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
301{
302	int i, j;
303
304	for (i = 0; i < bufmgr_gem->exec_count; i++) {
305		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
306		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
307
308		if (bo_gem->relocs == NULL) {
309			DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle,
310			    bo_gem->name);
311			continue;
312		}
313
314		for (j = 0; j < bo_gem->reloc_count; j++) {
315			drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo;
316			drm_intel_bo_gem *target_gem =
317			    (drm_intel_bo_gem *) target_bo;
318
319			DBG("%2d: %d (%s)@0x%08llx -> "
320			    "%d (%s)@0x%08lx + 0x%08x\n",
321			    i,
322			    bo_gem->gem_handle, bo_gem->name,
323			    (unsigned long long)bo_gem->relocs[j].offset,
324			    target_gem->gem_handle,
325			    target_gem->name,
326			    target_bo->offset,
327			    bo_gem->relocs[j].delta);
328		}
329	}
330}
331
332static inline void
333drm_intel_gem_bo_reference(drm_intel_bo *bo)
334{
335	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
336
337	assert(atomic_read(&bo_gem->refcount) > 0);
338	atomic_inc(&bo_gem->refcount);
339}
340
341/**
342 * Adds the given buffer to the list of buffers to be validated (moved into the
343 * appropriate memory type) with the next batch submission.
344 *
345 * If a buffer is validated multiple times in a batch submission, it ends up
346 * with the intersection of the memory type flags and the union of the
347 * access flags.
348 */
349static void
350drm_intel_add_validate_buffer(drm_intel_bo *bo)
351{
352	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
353	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
354	int index;
355
356	if (bo_gem->validate_index != -1)
357		return;
358
359	/* Extend the array of validation entries as necessary. */
360	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
361		int new_size = bufmgr_gem->exec_size * 2;
362
363		if (new_size == 0)
364			new_size = 5;
365
366		bufmgr_gem->exec_objects =
367		    realloc(bufmgr_gem->exec_objects,
368			    sizeof(*bufmgr_gem->exec_objects) * new_size);
369		bufmgr_gem->exec_bos =
370		    realloc(bufmgr_gem->exec_bos,
371			    sizeof(*bufmgr_gem->exec_bos) * new_size);
372		bufmgr_gem->exec_size = new_size;
373	}
374
375	index = bufmgr_gem->exec_count;
376	bo_gem->validate_index = index;
377	/* Fill in array entry */
378	bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle;
379	bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count;
380	bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs;
381	bufmgr_gem->exec_objects[index].alignment = 0;
382	bufmgr_gem->exec_objects[index].offset = 0;
383	bufmgr_gem->exec_bos[index] = bo;
384	bufmgr_gem->exec_count++;
385}
386
387static void
388drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
389{
390	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
391	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
392	int index;
393
394	if (bo_gem->validate_index != -1) {
395		if (need_fence)
396			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
397				EXEC_OBJECT_NEEDS_FENCE;
398		return;
399	}
400
401	/* Extend the array of validation entries as necessary. */
402	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
403		int new_size = bufmgr_gem->exec_size * 2;
404
405		if (new_size == 0)
406			new_size = 5;
407
408		bufmgr_gem->exec2_objects =
409			realloc(bufmgr_gem->exec2_objects,
410				sizeof(*bufmgr_gem->exec2_objects) * new_size);
411		bufmgr_gem->exec_bos =
412			realloc(bufmgr_gem->exec_bos,
413				sizeof(*bufmgr_gem->exec_bos) * new_size);
414		bufmgr_gem->exec_size = new_size;
415	}
416
417	index = bufmgr_gem->exec_count;
418	bo_gem->validate_index = index;
419	/* Fill in array entry */
420	bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle;
421	bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
422	bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
423	bufmgr_gem->exec2_objects[index].alignment = 0;
424	bufmgr_gem->exec2_objects[index].offset = 0;
425	bufmgr_gem->exec_bos[index] = bo;
426	bufmgr_gem->exec2_objects[index].flags = 0;
427	bufmgr_gem->exec2_objects[index].rsvd1 = 0;
428	bufmgr_gem->exec2_objects[index].rsvd2 = 0;
429	if (need_fence) {
430		bufmgr_gem->exec2_objects[index].flags |=
431			EXEC_OBJECT_NEEDS_FENCE;
432	}
433	bufmgr_gem->exec_count++;
434}
435
436#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
437	sizeof(uint32_t))
438
439static void
440drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem,
441				      drm_intel_bo_gem *bo_gem)
442{
443	int size;
444
445	assert(!bo_gem->used_as_reloc_target);
446
447	/* The older chipsets are far-less flexible in terms of tiling,
448	 * and require tiled buffer to be size aligned in the aperture.
449	 * This means that in the worst possible case we will need a hole
450	 * twice as large as the object in order for it to fit into the
451	 * aperture. Optimal packing is for wimps.
452	 */
453	size = bo_gem->bo.size;
454	if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE)
455		size *= 2;
456
457	bo_gem->reloc_tree_size = size;
458}
459
460static int
461drm_intel_setup_reloc_list(drm_intel_bo *bo)
462{
463	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
464	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
465	unsigned int max_relocs = bufmgr_gem->max_relocs;
466
467	if (bo->size / 4 < max_relocs)
468		max_relocs = bo->size / 4;
469
470	bo_gem->relocs = malloc(max_relocs *
471				sizeof(struct drm_i915_gem_relocation_entry));
472	bo_gem->reloc_target_info = malloc(max_relocs *
473					   sizeof(drm_intel_reloc_target));
474	if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) {
475		bo_gem->has_error = 1;
476
477		free (bo_gem->relocs);
478		bo_gem->relocs = NULL;
479
480		free (bo_gem->reloc_target_info);
481		bo_gem->reloc_target_info = NULL;
482
483		return 1;
484	}
485
486	return 0;
487}
488
489static int
490drm_intel_gem_bo_busy(drm_intel_bo *bo)
491{
492	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
493	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
494	struct drm_i915_gem_busy busy;
495	int ret;
496
497	memset(&busy, 0, sizeof(busy));
498	busy.handle = bo_gem->gem_handle;
499
500	do {
501		ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
502	} while (ret == -1 && errno == EINTR);
503
504	return (ret == 0 && busy.busy);
505}
506
507static int
508drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem,
509				  drm_intel_bo_gem *bo_gem, int state)
510{
511	struct drm_i915_gem_madvise madv;
512
513	madv.handle = bo_gem->gem_handle;
514	madv.madv = state;
515	madv.retained = 1;
516	ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
517
518	return madv.retained;
519}
520
521static int
522drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv)
523{
524	return drm_intel_gem_bo_madvise_internal
525		((drm_intel_bufmgr_gem *) bo->bufmgr,
526		 (drm_intel_bo_gem *) bo,
527		 madv);
528}
529
530/* drop the oldest entries that have been purged by the kernel */
531static void
532drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
533				    struct drm_intel_gem_bo_bucket *bucket)
534{
535	while (!DRMLISTEMPTY(&bucket->head)) {
536		drm_intel_bo_gem *bo_gem;
537
538		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
539				      bucket->head.next, head);
540		if (drm_intel_gem_bo_madvise_internal
541		    (bufmgr_gem, bo_gem, I915_MADV_DONTNEED))
542			break;
543
544		DRMLISTDEL(&bo_gem->head);
545		drm_intel_gem_bo_free(&bo_gem->bo);
546	}
547}
548
549static drm_intel_bo *
550drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
551				const char *name,
552				unsigned long size,
553				unsigned long flags)
554{
555	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
556	drm_intel_bo_gem *bo_gem;
557	unsigned int page_size = getpagesize();
558	int ret;
559	struct drm_intel_gem_bo_bucket *bucket;
560	int alloc_from_cache;
561	unsigned long bo_size;
562	int for_render = 0;
563
564	if (flags & BO_ALLOC_FOR_RENDER)
565		for_render = 1;
566
567	/* Round the allocated size up to a power of two number of pages. */
568	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
569
570	/* If we don't have caching at this size, don't actually round the
571	 * allocation up.
572	 */
573	if (bucket == NULL) {
574		bo_size = size;
575		if (bo_size < page_size)
576			bo_size = page_size;
577	} else {
578		bo_size = bucket->size;
579	}
580
581	pthread_mutex_lock(&bufmgr_gem->lock);
582	/* Get a buffer out of the cache if available */
583retry:
584	alloc_from_cache = 0;
585	if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) {
586		if (for_render) {
587			/* Allocate new render-target BOs from the tail (MRU)
588			 * of the list, as it will likely be hot in the GPU
589			 * cache and in the aperture for us.
590			 */
591			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
592					      bucket->head.prev, head);
593			DRMLISTDEL(&bo_gem->head);
594			alloc_from_cache = 1;
595		} else {
596			/* For non-render-target BOs (where we're probably
597			 * going to map it first thing in order to fill it
598			 * with data), check if the last BO in the cache is
599			 * unbusy, and only reuse in that case. Otherwise,
600			 * allocating a new buffer is probably faster than
601			 * waiting for the GPU to finish.
602			 */
603			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
604					      bucket->head.next, head);
605			if (!drm_intel_gem_bo_busy(&bo_gem->bo)) {
606				alloc_from_cache = 1;
607				DRMLISTDEL(&bo_gem->head);
608			}
609		}
610
611		if (alloc_from_cache) {
612			if (!drm_intel_gem_bo_madvise_internal
613			    (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
614				drm_intel_gem_bo_free(&bo_gem->bo);
615				drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
616								    bucket);
617				goto retry;
618			}
619		}
620	}
621	pthread_mutex_unlock(&bufmgr_gem->lock);
622
623	if (!alloc_from_cache) {
624		struct drm_i915_gem_create create;
625
626		bo_gem = calloc(1, sizeof(*bo_gem));
627		if (!bo_gem)
628			return NULL;
629
630		bo_gem->bo.size = bo_size;
631		memset(&create, 0, sizeof(create));
632		create.size = bo_size;
633
634		do {
635			ret = ioctl(bufmgr_gem->fd,
636				    DRM_IOCTL_I915_GEM_CREATE,
637				    &create);
638		} while (ret == -1 && errno == EINTR);
639		bo_gem->gem_handle = create.handle;
640		bo_gem->bo.handle = bo_gem->gem_handle;
641		if (ret != 0) {
642			free(bo_gem);
643			return NULL;
644		}
645		bo_gem->bo.bufmgr = bufmgr;
646	}
647
648	bo_gem->name = name;
649	atomic_set(&bo_gem->refcount, 1);
650	bo_gem->validate_index = -1;
651	bo_gem->reloc_tree_fences = 0;
652	bo_gem->used_as_reloc_target = 0;
653	bo_gem->has_error = 0;
654	bo_gem->tiling_mode = I915_TILING_NONE;
655	bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
656	bo_gem->stride = 0;
657	bo_gem->reusable = 1;
658
659	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
660
661	DBG("bo_create: buf %d (%s) %ldb\n",
662	    bo_gem->gem_handle, bo_gem->name, size);
663
664	return &bo_gem->bo;
665}
666
667static drm_intel_bo *
668drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
669				  const char *name,
670				  unsigned long size,
671				  unsigned int alignment)
672{
673	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
674					       BO_ALLOC_FOR_RENDER);
675}
676
677static drm_intel_bo *
678drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr,
679		       const char *name,
680		       unsigned long size,
681		       unsigned int alignment)
682{
683	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0);
684}
685
686static drm_intel_bo *
687drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
688			     int x, int y, int cpp, uint32_t *tiling_mode,
689			     unsigned long *pitch, unsigned long flags)
690{
691	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
692	drm_intel_bo *bo;
693	unsigned long size, stride;
694	uint32_t tiling;
695	int ret;
696
697	do {
698		unsigned long aligned_y;
699
700		tiling = *tiling_mode;
701
702		/* If we're tiled, our allocations are in 8 or 32-row blocks,
703		 * so failure to align our height means that we won't allocate
704		 * enough pages.
705		 *
706		 * If we're untiled, we still have to align to 2 rows high
707		 * because the data port accesses 2x2 blocks even if the
708		 * bottom row isn't to be rendered, so failure to align means
709		 * we could walk off the end of the GTT and fault.  This is
710		 * documented on 965, and may be the case on older chipsets
711		 * too so we try to be careful.
712		 */
713		aligned_y = y;
714		if (tiling == I915_TILING_NONE)
715			aligned_y = ALIGN(y, 2);
716		else if (tiling == I915_TILING_X)
717			aligned_y = ALIGN(y, 8);
718		else if (tiling == I915_TILING_Y)
719			aligned_y = ALIGN(y, 32);
720
721		stride = x * cpp;
722		stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling);
723		size = stride * aligned_y;
724		size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode);
725	} while (*tiling_mode != tiling);
726
727	bo = drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags);
728	if (!bo)
729		return NULL;
730
731	ret = drm_intel_gem_bo_set_tiling(bo, tiling_mode, stride);
732	if (ret != 0) {
733		drm_intel_gem_bo_unreference(bo);
734		return NULL;
735	}
736
737	*pitch = stride;
738
739	return bo;
740}
741
742/**
743 * Returns a drm_intel_bo wrapping the given buffer object handle.
744 *
745 * This can be used when one application needs to pass a buffer object
746 * to another.
747 */
748drm_intel_bo *
749drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr,
750				  const char *name,
751				  unsigned int handle)
752{
753	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
754	drm_intel_bo_gem *bo_gem;
755	int ret;
756	struct drm_gem_open open_arg;
757	struct drm_i915_gem_get_tiling get_tiling;
758
759	bo_gem = calloc(1, sizeof(*bo_gem));
760	if (!bo_gem)
761		return NULL;
762
763	memset(&open_arg, 0, sizeof(open_arg));
764	open_arg.name = handle;
765	do {
766		ret = ioctl(bufmgr_gem->fd,
767			    DRM_IOCTL_GEM_OPEN,
768			    &open_arg);
769	} while (ret == -1 && errno == EINTR);
770	if (ret != 0) {
771		fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n",
772			name, handle, strerror(errno));
773		free(bo_gem);
774		return NULL;
775	}
776	bo_gem->bo.size = open_arg.size;
777	bo_gem->bo.offset = 0;
778	bo_gem->bo.virtual = NULL;
779	bo_gem->bo.bufmgr = bufmgr;
780	bo_gem->name = name;
781	atomic_set(&bo_gem->refcount, 1);
782	bo_gem->validate_index = -1;
783	bo_gem->gem_handle = open_arg.handle;
784	bo_gem->global_name = handle;
785	bo_gem->reusable = 0;
786
787	memset(&get_tiling, 0, sizeof(get_tiling));
788	get_tiling.handle = bo_gem->gem_handle;
789	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_TILING, &get_tiling);
790	if (ret != 0) {
791		drm_intel_gem_bo_unreference(&bo_gem->bo);
792		return NULL;
793	}
794	bo_gem->tiling_mode = get_tiling.tiling_mode;
795	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
796	/* XXX stride is unknown */
797	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
798
799	DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
800
801	return &bo_gem->bo;
802}
803
804static void
805drm_intel_gem_bo_free(drm_intel_bo *bo)
806{
807	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
808	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
809	struct drm_gem_close close;
810	int ret;
811
812	if (bo_gem->mem_virtual)
813		munmap(bo_gem->mem_virtual, bo_gem->bo.size);
814	if (bo_gem->gtt_virtual)
815		munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
816
817	/* Close this object */
818	memset(&close, 0, sizeof(close));
819	close.handle = bo_gem->gem_handle;
820	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close);
821	if (ret != 0) {
822		fprintf(stderr,
823			"DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
824			bo_gem->gem_handle, bo_gem->name, strerror(errno));
825	}
826	free(bo);
827}
828
829/** Frees all cached buffers significantly older than @time. */
830static void
831drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
832{
833	int i;
834
835	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
836		struct drm_intel_gem_bo_bucket *bucket =
837		    &bufmgr_gem->cache_bucket[i];
838
839		while (!DRMLISTEMPTY(&bucket->head)) {
840			drm_intel_bo_gem *bo_gem;
841
842			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
843					      bucket->head.next, head);
844			if (time - bo_gem->free_time <= 1)
845				break;
846
847			DRMLISTDEL(&bo_gem->head);
848
849			drm_intel_gem_bo_free(&bo_gem->bo);
850		}
851	}
852}
853
854static void
855drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
856{
857	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
858	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
859	struct drm_intel_gem_bo_bucket *bucket;
860	uint32_t tiling_mode;
861	int i;
862
863	/* Unreference all the target buffers */
864	for (i = 0; i < bo_gem->reloc_count; i++) {
865		if (bo_gem->reloc_target_info[i].bo != bo) {
866			drm_intel_gem_bo_unreference_locked_timed(bo_gem->
867								  reloc_target_info[i].bo,
868								  time);
869		}
870	}
871	bo_gem->reloc_count = 0;
872	bo_gem->used_as_reloc_target = 0;
873
874	DBG("bo_unreference final: %d (%s)\n",
875	    bo_gem->gem_handle, bo_gem->name);
876
877	/* release memory associated with this object */
878	if (bo_gem->reloc_target_info) {
879		free(bo_gem->reloc_target_info);
880		bo_gem->reloc_target_info = NULL;
881	}
882	if (bo_gem->relocs) {
883		free(bo_gem->relocs);
884		bo_gem->relocs = NULL;
885	}
886
887	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
888	/* Put the buffer into our internal cache for reuse if we can. */
889	tiling_mode = I915_TILING_NONE;
890	if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
891	    drm_intel_gem_bo_set_tiling(bo, &tiling_mode, 0) == 0 &&
892	    drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
893					      I915_MADV_DONTNEED)) {
894		bo_gem->free_time = time;
895
896		bo_gem->name = NULL;
897		bo_gem->validate_index = -1;
898
899		DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
900
901		drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time);
902	} else {
903		drm_intel_gem_bo_free(bo);
904	}
905}
906
907static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
908						      time_t time)
909{
910	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
911
912	assert(atomic_read(&bo_gem->refcount) > 0);
913	if (atomic_dec_and_test(&bo_gem->refcount))
914		drm_intel_gem_bo_unreference_final(bo, time);
915}
916
917static void drm_intel_gem_bo_unreference(drm_intel_bo *bo)
918{
919	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
920
921	assert(atomic_read(&bo_gem->refcount) > 0);
922	if (atomic_dec_and_test(&bo_gem->refcount)) {
923		drm_intel_bufmgr_gem *bufmgr_gem =
924		    (drm_intel_bufmgr_gem *) bo->bufmgr;
925		struct timespec time;
926
927		clock_gettime(CLOCK_MONOTONIC, &time);
928
929		pthread_mutex_lock(&bufmgr_gem->lock);
930		drm_intel_gem_bo_unreference_final(bo, time.tv_sec);
931		pthread_mutex_unlock(&bufmgr_gem->lock);
932	}
933}
934
935static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
936{
937	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
938	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
939	struct drm_i915_gem_set_domain set_domain;
940	int ret;
941
942	pthread_mutex_lock(&bufmgr_gem->lock);
943
944	/* Allow recursive mapping. Mesa may recursively map buffers with
945	 * nested display loops.
946	 */
947	if (!bo_gem->mem_virtual) {
948		struct drm_i915_gem_mmap mmap_arg;
949
950		DBG("bo_map: %d (%s)\n", bo_gem->gem_handle, bo_gem->name);
951
952		memset(&mmap_arg, 0, sizeof(mmap_arg));
953		mmap_arg.handle = bo_gem->gem_handle;
954		mmap_arg.offset = 0;
955		mmap_arg.size = bo->size;
956		do {
957			ret = ioctl(bufmgr_gem->fd,
958				    DRM_IOCTL_I915_GEM_MMAP,
959				    &mmap_arg);
960		} while (ret == -1 && errno == EINTR);
961		if (ret != 0) {
962			ret = -errno;
963			fprintf(stderr,
964				"%s:%d: Error mapping buffer %d (%s): %s .\n",
965				__FILE__, __LINE__, bo_gem->gem_handle,
966				bo_gem->name, strerror(errno));
967			pthread_mutex_unlock(&bufmgr_gem->lock);
968			return ret;
969		}
970		bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
971	}
972	DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
973	    bo_gem->mem_virtual);
974	bo->virtual = bo_gem->mem_virtual;
975
976	set_domain.handle = bo_gem->gem_handle;
977	set_domain.read_domains = I915_GEM_DOMAIN_CPU;
978	if (write_enable)
979		set_domain.write_domain = I915_GEM_DOMAIN_CPU;
980	else
981		set_domain.write_domain = 0;
982	do {
983		ret = ioctl(bufmgr_gem->fd,
984			    DRM_IOCTL_I915_GEM_SET_DOMAIN,
985			    &set_domain);
986	} while (ret == -1 && errno == EINTR);
987	if (ret != 0) {
988		ret = -errno;
989		fprintf(stderr, "%s:%d: Error setting to CPU domain %d: %s\n",
990			__FILE__, __LINE__, bo_gem->gem_handle,
991			strerror(errno));
992		pthread_mutex_unlock(&bufmgr_gem->lock);
993		return ret;
994	}
995
996	pthread_mutex_unlock(&bufmgr_gem->lock);
997
998	return 0;
999}
1000
1001int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
1002{
1003	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1004	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1005	struct drm_i915_gem_set_domain set_domain;
1006	int ret;
1007
1008	pthread_mutex_lock(&bufmgr_gem->lock);
1009
1010	/* Get a mapping of the buffer if we haven't before. */
1011	if (bo_gem->gtt_virtual == NULL) {
1012		struct drm_i915_gem_mmap_gtt mmap_arg;
1013
1014		DBG("bo_map_gtt: mmap %d (%s)\n", bo_gem->gem_handle,
1015		    bo_gem->name);
1016
1017		memset(&mmap_arg, 0, sizeof(mmap_arg));
1018		mmap_arg.handle = bo_gem->gem_handle;
1019
1020		/* Get the fake offset back... */
1021		do {
1022			ret = ioctl(bufmgr_gem->fd,
1023				    DRM_IOCTL_I915_GEM_MMAP_GTT,
1024				    &mmap_arg);
1025		} while (ret == -1 && errno == EINTR);
1026		if (ret != 0) {
1027			ret = -errno;
1028			fprintf(stderr,
1029				"%s:%d: Error preparing buffer map %d (%s): %s .\n",
1030				__FILE__, __LINE__,
1031				bo_gem->gem_handle, bo_gem->name,
1032				strerror(errno));
1033			pthread_mutex_unlock(&bufmgr_gem->lock);
1034			return ret;
1035		}
1036
1037		/* and mmap it */
1038		bo_gem->gtt_virtual = mmap(0, bo->size, PROT_READ | PROT_WRITE,
1039					   MAP_SHARED, bufmgr_gem->fd,
1040					   mmap_arg.offset);
1041		if (bo_gem->gtt_virtual == MAP_FAILED) {
1042			bo_gem->gtt_virtual = NULL;
1043			ret = -errno;
1044			fprintf(stderr,
1045				"%s:%d: Error mapping buffer %d (%s): %s .\n",
1046				__FILE__, __LINE__,
1047				bo_gem->gem_handle, bo_gem->name,
1048				strerror(errno));
1049			pthread_mutex_unlock(&bufmgr_gem->lock);
1050			return ret;
1051		}
1052	}
1053
1054	bo->virtual = bo_gem->gtt_virtual;
1055
1056	DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1057	    bo_gem->gtt_virtual);
1058
1059	/* Now move it to the GTT domain so that the CPU caches are flushed */
1060	set_domain.handle = bo_gem->gem_handle;
1061	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1062	set_domain.write_domain = I915_GEM_DOMAIN_GTT;
1063	do {
1064		ret = ioctl(bufmgr_gem->fd,
1065			    DRM_IOCTL_I915_GEM_SET_DOMAIN,
1066			    &set_domain);
1067	} while (ret == -1 && errno == EINTR);
1068
1069	if (ret != 0) {
1070		ret = -errno;
1071		fprintf(stderr, "%s:%d: Error setting domain %d: %s\n",
1072			__FILE__, __LINE__, bo_gem->gem_handle,
1073			strerror(errno));
1074	}
1075
1076	pthread_mutex_unlock(&bufmgr_gem->lock);
1077
1078	return ret;
1079}
1080
1081int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo)
1082{
1083	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1084	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1085	int ret = 0;
1086
1087	if (bo == NULL)
1088		return 0;
1089
1090	assert(bo_gem->gtt_virtual != NULL);
1091
1092	pthread_mutex_lock(&bufmgr_gem->lock);
1093	bo->virtual = NULL;
1094	pthread_mutex_unlock(&bufmgr_gem->lock);
1095
1096	return ret;
1097}
1098
1099static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
1100{
1101	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1102	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1103	struct drm_i915_gem_sw_finish sw_finish;
1104	int ret;
1105
1106	if (bo == NULL)
1107		return 0;
1108
1109	assert(bo_gem->mem_virtual != NULL);
1110
1111	pthread_mutex_lock(&bufmgr_gem->lock);
1112
1113	/* Cause a flush to happen if the buffer's pinned for scanout, so the
1114	 * results show up in a timely manner.
1115	 */
1116	sw_finish.handle = bo_gem->gem_handle;
1117	do {
1118		ret = ioctl(bufmgr_gem->fd,
1119			    DRM_IOCTL_I915_GEM_SW_FINISH,
1120			    &sw_finish);
1121	} while (ret == -1 && errno == EINTR);
1122	ret = ret == -1 ? -errno : 0;
1123
1124	bo->virtual = NULL;
1125	pthread_mutex_unlock(&bufmgr_gem->lock);
1126
1127	return ret;
1128}
1129
1130static int
1131drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
1132			 unsigned long size, const void *data)
1133{
1134	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1135	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1136	struct drm_i915_gem_pwrite pwrite;
1137	int ret;
1138
1139	memset(&pwrite, 0, sizeof(pwrite));
1140	pwrite.handle = bo_gem->gem_handle;
1141	pwrite.offset = offset;
1142	pwrite.size = size;
1143	pwrite.data_ptr = (uint64_t) (uintptr_t) data;
1144	do {
1145		ret = ioctl(bufmgr_gem->fd,
1146			    DRM_IOCTL_I915_GEM_PWRITE,
1147			    &pwrite);
1148	} while (ret == -1 && errno == EINTR);
1149	if (ret != 0) {
1150		ret = -errno;
1151		fprintf(stderr,
1152			"%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
1153			__FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1154			(int)size, strerror(errno));
1155	}
1156
1157	return ret;
1158}
1159
1160static int
1161drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id)
1162{
1163	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1164	struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id;
1165	int ret;
1166
1167	get_pipe_from_crtc_id.crtc_id = crtc_id;
1168	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID,
1169		    &get_pipe_from_crtc_id);
1170	if (ret != 0) {
1171		/* We return -1 here to signal that we don't
1172		 * know which pipe is associated with this crtc.
1173		 * This lets the caller know that this information
1174		 * isn't available; using the wrong pipe for
1175		 * vblank waiting can cause the chipset to lock up
1176		 */
1177		return -1;
1178	}
1179
1180	return get_pipe_from_crtc_id.pipe;
1181}
1182
1183static int
1184drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
1185			     unsigned long size, void *data)
1186{
1187	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1188	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1189	struct drm_i915_gem_pread pread;
1190	int ret;
1191
1192	memset(&pread, 0, sizeof(pread));
1193	pread.handle = bo_gem->gem_handle;
1194	pread.offset = offset;
1195	pread.size = size;
1196	pread.data_ptr = (uint64_t) (uintptr_t) data;
1197	do {
1198		ret = ioctl(bufmgr_gem->fd,
1199			    DRM_IOCTL_I915_GEM_PREAD,
1200			    &pread);
1201	} while (ret == -1 && errno == EINTR);
1202	if (ret != 0) {
1203		ret = -errno;
1204		fprintf(stderr,
1205			"%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
1206			__FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1207			(int)size, strerror(errno));
1208	}
1209
1210	return ret;
1211}
1212
1213/** Waits for all GPU rendering to the object to have completed. */
1214static void
1215drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo)
1216{
1217	drm_intel_gem_bo_start_gtt_access(bo, 0);
1218}
1219
1220/**
1221 * Sets the object to the GTT read and possibly write domain, used by the X
1222 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt().
1223 *
1224 * In combination with drm_intel_gem_bo_pin() and manual fence management, we
1225 * can do tiled pixmaps this way.
1226 */
1227void
1228drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable)
1229{
1230	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1231	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1232	struct drm_i915_gem_set_domain set_domain;
1233	int ret;
1234
1235	set_domain.handle = bo_gem->gem_handle;
1236	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1237	set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
1238	do {
1239		ret = ioctl(bufmgr_gem->fd,
1240			    DRM_IOCTL_I915_GEM_SET_DOMAIN,
1241			    &set_domain);
1242	} while (ret == -1 && errno == EINTR);
1243	if (ret != 0) {
1244		fprintf(stderr,
1245			"%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
1246			__FILE__, __LINE__, bo_gem->gem_handle,
1247			set_domain.read_domains, set_domain.write_domain,
1248			strerror(errno));
1249	}
1250}
1251
1252static void
1253drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
1254{
1255	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1256	int i;
1257
1258	free(bufmgr_gem->exec2_objects);
1259	free(bufmgr_gem->exec_objects);
1260	free(bufmgr_gem->exec_bos);
1261
1262	pthread_mutex_destroy(&bufmgr_gem->lock);
1263
1264	/* Free any cached buffer objects we were going to reuse */
1265	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
1266		struct drm_intel_gem_bo_bucket *bucket =
1267		    &bufmgr_gem->cache_bucket[i];
1268		drm_intel_bo_gem *bo_gem;
1269
1270		while (!DRMLISTEMPTY(&bucket->head)) {
1271			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1272					      bucket->head.next, head);
1273			DRMLISTDEL(&bo_gem->head);
1274
1275			drm_intel_gem_bo_free(&bo_gem->bo);
1276		}
1277	}
1278
1279	free(bufmgr);
1280}
1281
1282/**
1283 * Adds the target buffer to the validation list and adds the relocation
1284 * to the reloc_buffer's relocation list.
1285 *
1286 * The relocation entry at the given offset must already contain the
1287 * precomputed relocation value, because the kernel will optimize out
1288 * the relocation entry write when the buffer hasn't moved from the
1289 * last known offset in target_bo.
1290 */
1291static int
1292do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
1293		 drm_intel_bo *target_bo, uint32_t target_offset,
1294		 uint32_t read_domains, uint32_t write_domain,
1295		 int need_fence)
1296{
1297	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1298	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1299	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
1300
1301	if (bo_gem->has_error)
1302		return -ENOMEM;
1303
1304	if (target_bo_gem->has_error) {
1305		bo_gem->has_error = 1;
1306		return -ENOMEM;
1307	}
1308
1309	if (target_bo_gem->tiling_mode == I915_TILING_NONE)
1310		need_fence = 0;
1311
1312	/* We never use HW fences for rendering on 965+ */
1313	if (bufmgr_gem->gen >= 4)
1314		need_fence = 0;
1315
1316	/* Create a new relocation list if needed */
1317	if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo))
1318		return -ENOMEM;
1319
1320	/* Check overflow */
1321	assert(bo_gem->reloc_count < bufmgr_gem->max_relocs);
1322
1323	/* Check args */
1324	assert(offset <= bo->size - 4);
1325	assert((write_domain & (write_domain - 1)) == 0);
1326
1327	/* Make sure that we're not adding a reloc to something whose size has
1328	 * already been accounted for.
1329	 */
1330	assert(!bo_gem->used_as_reloc_target);
1331	if (target_bo_gem != bo_gem) {
1332		target_bo_gem->used_as_reloc_target = 1;
1333		bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
1334	}
1335	/* An object needing a fence is a tiled buffer, so it won't have
1336	 * relocs to other buffers.
1337	 */
1338	if (need_fence)
1339		target_bo_gem->reloc_tree_fences = 1;
1340	bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
1341
1342	/* Flag the target to disallow further relocations in it. */
1343
1344	bo_gem->relocs[bo_gem->reloc_count].offset = offset;
1345	bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
1346	bo_gem->relocs[bo_gem->reloc_count].target_handle =
1347	    target_bo_gem->gem_handle;
1348	bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
1349	bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
1350	bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset;
1351
1352	bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
1353	if (target_bo != bo)
1354		drm_intel_gem_bo_reference(target_bo);
1355	if (need_fence)
1356		bo_gem->reloc_target_info[bo_gem->reloc_count].flags =
1357			DRM_INTEL_RELOC_FENCE;
1358	else
1359		bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
1360
1361	bo_gem->reloc_count++;
1362
1363	return 0;
1364}
1365
1366static int
1367drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
1368			    drm_intel_bo *target_bo, uint32_t target_offset,
1369			    uint32_t read_domains, uint32_t write_domain)
1370{
1371	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
1372
1373	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
1374				read_domains, write_domain,
1375				!bufmgr_gem->fenced_relocs);
1376}
1377
1378static int
1379drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
1380				  drm_intel_bo *target_bo,
1381				  uint32_t target_offset,
1382				  uint32_t read_domains, uint32_t write_domain)
1383{
1384	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
1385				read_domains, write_domain, 1);
1386}
1387
1388/**
1389 * Walk the tree of relocations rooted at BO and accumulate the list of
1390 * validations to be performed and update the relocation buffers with
1391 * index values into the validation list.
1392 */
1393static void
1394drm_intel_gem_bo_process_reloc(drm_intel_bo *bo)
1395{
1396	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1397	int i;
1398
1399	if (bo_gem->relocs == NULL)
1400		return;
1401
1402	for (i = 0; i < bo_gem->reloc_count; i++) {
1403		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
1404
1405		if (target_bo == bo)
1406			continue;
1407
1408		/* Continue walking the tree depth-first. */
1409		drm_intel_gem_bo_process_reloc(target_bo);
1410
1411		/* Add the target to the validate list */
1412		drm_intel_add_validate_buffer(target_bo);
1413	}
1414}
1415
1416static void
1417drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
1418{
1419	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1420	int i;
1421
1422	if (bo_gem->relocs == NULL)
1423		return;
1424
1425	for (i = 0; i < bo_gem->reloc_count; i++) {
1426		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
1427		int need_fence;
1428
1429		if (target_bo == bo)
1430			continue;
1431
1432		/* Continue walking the tree depth-first. */
1433		drm_intel_gem_bo_process_reloc2(target_bo);
1434
1435		need_fence = (bo_gem->reloc_target_info[i].flags &
1436			      DRM_INTEL_RELOC_FENCE);
1437
1438		/* Add the target to the validate list */
1439		drm_intel_add_validate_buffer2(target_bo, need_fence);
1440	}
1441}
1442
1443
1444static void
1445drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem)
1446{
1447	int i;
1448
1449	for (i = 0; i < bufmgr_gem->exec_count; i++) {
1450		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
1451		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1452
1453		/* Update the buffer offset */
1454		if (bufmgr_gem->exec_objects[i].offset != bo->offset) {
1455			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
1456			    bo_gem->gem_handle, bo_gem->name, bo->offset,
1457			    (unsigned long long)bufmgr_gem->exec_objects[i].
1458			    offset);
1459			bo->offset = bufmgr_gem->exec_objects[i].offset;
1460		}
1461	}
1462}
1463
1464static void
1465drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
1466{
1467	int i;
1468
1469	for (i = 0; i < bufmgr_gem->exec_count; i++) {
1470		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
1471		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1472
1473		/* Update the buffer offset */
1474		if (bufmgr_gem->exec2_objects[i].offset != bo->offset) {
1475			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
1476			    bo_gem->gem_handle, bo_gem->name, bo->offset,
1477			    (unsigned long long)bufmgr_gem->exec2_objects[i].offset);
1478			bo->offset = bufmgr_gem->exec2_objects[i].offset;
1479		}
1480	}
1481}
1482
1483static int
1484drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
1485		      drm_clip_rect_t * cliprects, int num_cliprects, int DR4)
1486{
1487	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1488	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1489	struct drm_i915_gem_execbuffer execbuf;
1490	int ret, i;
1491
1492	if (bo_gem->has_error)
1493		return -ENOMEM;
1494
1495	pthread_mutex_lock(&bufmgr_gem->lock);
1496	/* Update indices and set up the validate list. */
1497	drm_intel_gem_bo_process_reloc(bo);
1498
1499	/* Add the batch buffer to the validation list.  There are no
1500	 * relocations pointing to it.
1501	 */
1502	drm_intel_add_validate_buffer(bo);
1503
1504	execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects;
1505	execbuf.buffer_count = bufmgr_gem->exec_count;
1506	execbuf.batch_start_offset = 0;
1507	execbuf.batch_len = used;
1508	execbuf.cliprects_ptr = (uintptr_t) cliprects;
1509	execbuf.num_cliprects = num_cliprects;
1510	execbuf.DR1 = 0;
1511	execbuf.DR4 = DR4;
1512
1513	do {
1514		ret = ioctl(bufmgr_gem->fd,
1515			    DRM_IOCTL_I915_GEM_EXECBUFFER,
1516			    &execbuf);
1517	} while (ret != 0 && errno == EINTR);
1518
1519	if (ret != 0) {
1520		ret = -errno;
1521		if (errno == ENOSPC) {
1522			fprintf(stderr,
1523				"Execbuffer fails to pin. "
1524				"Estimate: %u. Actual: %u. Available: %u\n",
1525				drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
1526								   bufmgr_gem->
1527								   exec_count),
1528				drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
1529								  bufmgr_gem->
1530								  exec_count),
1531				(unsigned int)bufmgr_gem->gtt_size);
1532		}
1533	}
1534	drm_intel_update_buffer_offsets(bufmgr_gem);
1535
1536	if (bufmgr_gem->bufmgr.debug)
1537		drm_intel_gem_dump_validation_list(bufmgr_gem);
1538
1539	for (i = 0; i < bufmgr_gem->exec_count; i++) {
1540		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
1541		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1542
1543		/* Disconnect the buffer from the validate list */
1544		bo_gem->validate_index = -1;
1545		bufmgr_gem->exec_bos[i] = NULL;
1546	}
1547	bufmgr_gem->exec_count = 0;
1548	pthread_mutex_unlock(&bufmgr_gem->lock);
1549
1550	return ret;
1551}
1552
1553static int
1554drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
1555			drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
1556			int ring_flag)
1557{
1558	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
1559	struct drm_i915_gem_execbuffer2 execbuf;
1560	int ret, i;
1561
1562	if ((ring_flag != I915_EXEC_RENDER) && (ring_flag != I915_EXEC_BSD))
1563		return -EINVAL;
1564
1565	pthread_mutex_lock(&bufmgr_gem->lock);
1566	/* Update indices and set up the validate list. */
1567	drm_intel_gem_bo_process_reloc2(bo);
1568
1569	/* Add the batch buffer to the validation list.  There are no relocations
1570	 * pointing to it.
1571	 */
1572	drm_intel_add_validate_buffer2(bo, 0);
1573
1574	execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
1575	execbuf.buffer_count = bufmgr_gem->exec_count;
1576	execbuf.batch_start_offset = 0;
1577	execbuf.batch_len = used;
1578	execbuf.cliprects_ptr = (uintptr_t)cliprects;
1579	execbuf.num_cliprects = num_cliprects;
1580	execbuf.DR1 = 0;
1581	execbuf.DR4 = DR4;
1582	execbuf.flags = ring_flag;
1583	execbuf.rsvd1 = 0;
1584	execbuf.rsvd2 = 0;
1585
1586	do {
1587		ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_EXECBUFFER2,
1588			    &execbuf);
1589	} while (ret != 0 && errno == EINTR);
1590
1591	if (ret != 0) {
1592		ret = -errno;
1593		if (ret == -ENOMEM) {
1594			fprintf(stderr,
1595				"Execbuffer fails to pin. "
1596				"Estimate: %u. Actual: %u. Available: %u\n",
1597				drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
1598								   bufmgr_gem->exec_count),
1599				drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
1600								  bufmgr_gem->exec_count),
1601				(unsigned int) bufmgr_gem->gtt_size);
1602		}
1603	}
1604	drm_intel_update_buffer_offsets2(bufmgr_gem);
1605
1606	if (bufmgr_gem->bufmgr.debug)
1607		drm_intel_gem_dump_validation_list(bufmgr_gem);
1608
1609	for (i = 0; i < bufmgr_gem->exec_count; i++) {
1610		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
1611		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1612
1613		/* Disconnect the buffer from the validate list */
1614		bo_gem->validate_index = -1;
1615		bufmgr_gem->exec_bos[i] = NULL;
1616	}
1617	bufmgr_gem->exec_count = 0;
1618	pthread_mutex_unlock(&bufmgr_gem->lock);
1619
1620	return ret;
1621}
1622
1623static int
1624drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used,
1625		       drm_clip_rect_t *cliprects, int num_cliprects,
1626		       int DR4)
1627{
1628	return drm_intel_gem_bo_mrb_exec2(bo, used,
1629					cliprects, num_cliprects, DR4,
1630					I915_EXEC_RENDER);
1631}
1632
1633static int
1634drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment)
1635{
1636	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1637	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1638	struct drm_i915_gem_pin pin;
1639	int ret;
1640
1641	memset(&pin, 0, sizeof(pin));
1642	pin.handle = bo_gem->gem_handle;
1643	pin.alignment = alignment;
1644
1645	do {
1646		ret = ioctl(bufmgr_gem->fd,
1647			    DRM_IOCTL_I915_GEM_PIN,
1648			    &pin);
1649	} while (ret == -1 && errno == EINTR);
1650
1651	if (ret != 0)
1652		return -errno;
1653
1654	bo->offset = pin.offset;
1655	return 0;
1656}
1657
1658static int
1659drm_intel_gem_bo_unpin(drm_intel_bo *bo)
1660{
1661	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1662	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1663	struct drm_i915_gem_unpin unpin;
1664	int ret;
1665
1666	memset(&unpin, 0, sizeof(unpin));
1667	unpin.handle = bo_gem->gem_handle;
1668
1669	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin);
1670	if (ret != 0)
1671		return -errno;
1672
1673	return 0;
1674}
1675
1676static int
1677drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
1678			    uint32_t stride)
1679{
1680	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1681	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1682	struct drm_i915_gem_set_tiling set_tiling;
1683	int ret;
1684
1685	if (bo_gem->global_name == 0)
1686		return 0;
1687
1688	if (*tiling_mode == bo_gem->tiling_mode &&
1689	    stride == bo_gem->stride)
1690		return 0;
1691
1692	memset(&set_tiling, 0, sizeof(set_tiling));
1693	set_tiling.handle = bo_gem->gem_handle;
1694
1695	do {
1696		set_tiling.tiling_mode = *tiling_mode;
1697		set_tiling.stride = stride;
1698
1699		ret = ioctl(bufmgr_gem->fd,
1700			    DRM_IOCTL_I915_GEM_SET_TILING,
1701			    &set_tiling);
1702	} while (ret == -1 && errno == EINTR);
1703	if (ret == 0) {
1704		bo_gem->tiling_mode = set_tiling.tiling_mode;
1705		bo_gem->swizzle_mode = set_tiling.swizzle_mode;
1706		bo_gem->stride = stride;
1707		drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
1708	} else
1709		ret = -errno;
1710
1711	*tiling_mode = bo_gem->tiling_mode;
1712	return ret;
1713}
1714
1715static int
1716drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
1717			    uint32_t * swizzle_mode)
1718{
1719	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1720
1721	*tiling_mode = bo_gem->tiling_mode;
1722	*swizzle_mode = bo_gem->swizzle_mode;
1723	return 0;
1724}
1725
1726static int
1727drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name)
1728{
1729	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1730	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1731	struct drm_gem_flink flink;
1732	int ret;
1733
1734	if (!bo_gem->global_name) {
1735		memset(&flink, 0, sizeof(flink));
1736		flink.handle = bo_gem->gem_handle;
1737
1738		ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink);
1739		if (ret != 0)
1740			return -errno;
1741		bo_gem->global_name = flink.name;
1742		bo_gem->reusable = 0;
1743	}
1744
1745	*name = bo_gem->global_name;
1746	return 0;
1747}
1748
1749/**
1750 * Enables unlimited caching of buffer objects for reuse.
1751 *
1752 * This is potentially very memory expensive, as the cache at each bucket
1753 * size is only bounded by how many buffers of that size we've managed to have
1754 * in flight at once.
1755 */
1756void
1757drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr)
1758{
1759	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1760
1761	bufmgr_gem->bo_reuse = 1;
1762}
1763
1764/**
1765 * Enable use of fenced reloc type.
1766 *
1767 * New code should enable this to avoid unnecessary fence register
1768 * allocation.  If this option is not enabled, all relocs will have fence
1769 * register allocated.
1770 */
1771void
1772drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr)
1773{
1774	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
1775
1776	if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2)
1777		bufmgr_gem->fenced_relocs = 1;
1778}
1779
1780/**
1781 * Return the additional aperture space required by the tree of buffer objects
1782 * rooted at bo.
1783 */
1784static int
1785drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo)
1786{
1787	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1788	int i;
1789	int total = 0;
1790
1791	if (bo == NULL || bo_gem->included_in_check_aperture)
1792		return 0;
1793
1794	total += bo->size;
1795	bo_gem->included_in_check_aperture = 1;
1796
1797	for (i = 0; i < bo_gem->reloc_count; i++)
1798		total +=
1799		    drm_intel_gem_bo_get_aperture_space(bo_gem->
1800							reloc_target_info[i].bo);
1801
1802	return total;
1803}
1804
1805/**
1806 * Count the number of buffers in this list that need a fence reg
1807 *
1808 * If the count is greater than the number of available regs, we'll have
1809 * to ask the caller to resubmit a batch with fewer tiled buffers.
1810 *
1811 * This function over-counts if the same buffer is used multiple times.
1812 */
1813static unsigned int
1814drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count)
1815{
1816	int i;
1817	unsigned int total = 0;
1818
1819	for (i = 0; i < count; i++) {
1820		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
1821
1822		if (bo_gem == NULL)
1823			continue;
1824
1825		total += bo_gem->reloc_tree_fences;
1826	}
1827	return total;
1828}
1829
1830/**
1831 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready
1832 * for the next drm_intel_bufmgr_check_aperture_space() call.
1833 */
1834static void
1835drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo)
1836{
1837	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1838	int i;
1839
1840	if (bo == NULL || !bo_gem->included_in_check_aperture)
1841		return;
1842
1843	bo_gem->included_in_check_aperture = 0;
1844
1845	for (i = 0; i < bo_gem->reloc_count; i++)
1846		drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->
1847							   reloc_target_info[i].bo);
1848}
1849
1850/**
1851 * Return a conservative estimate for the amount of aperture required
1852 * for a collection of buffers. This may double-count some buffers.
1853 */
1854static unsigned int
1855drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count)
1856{
1857	int i;
1858	unsigned int total = 0;
1859
1860	for (i = 0; i < count; i++) {
1861		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
1862		if (bo_gem != NULL)
1863			total += bo_gem->reloc_tree_size;
1864	}
1865	return total;
1866}
1867
1868/**
1869 * Return the amount of aperture needed for a collection of buffers.
1870 * This avoids double counting any buffers, at the cost of looking
1871 * at every buffer in the set.
1872 */
1873static unsigned int
1874drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count)
1875{
1876	int i;
1877	unsigned int total = 0;
1878
1879	for (i = 0; i < count; i++) {
1880		total += drm_intel_gem_bo_get_aperture_space(bo_array[i]);
1881		/* For the first buffer object in the array, we get an
1882		 * accurate count back for its reloc_tree size (since nothing
1883		 * had been flagged as being counted yet).  We can save that
1884		 * value out as a more conservative reloc_tree_size that
1885		 * avoids double-counting target buffers.  Since the first
1886		 * buffer happens to usually be the batch buffer in our
1887		 * callers, this can pull us back from doing the tree
1888		 * walk on every new batch emit.
1889		 */
1890		if (i == 0) {
1891			drm_intel_bo_gem *bo_gem =
1892			    (drm_intel_bo_gem *) bo_array[i];
1893			bo_gem->reloc_tree_size = total;
1894		}
1895	}
1896
1897	for (i = 0; i < count; i++)
1898		drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]);
1899	return total;
1900}
1901
1902/**
1903 * Return -1 if the batchbuffer should be flushed before attempting to
1904 * emit rendering referencing the buffers pointed to by bo_array.
1905 *
1906 * This is required because if we try to emit a batchbuffer with relocations
1907 * to a tree of buffers that won't simultaneously fit in the aperture,
1908 * the rendering will return an error at a point where the software is not
1909 * prepared to recover from it.
1910 *
1911 * However, we also want to emit the batchbuffer significantly before we reach
1912 * the limit, as a series of batchbuffers each of which references buffers
1913 * covering almost all of the aperture means that at each emit we end up
1914 * waiting to evict a buffer from the last rendering, and we get synchronous
1915 * performance.  By emitting smaller batchbuffers, we eat some CPU overhead to
1916 * get better parallelism.
1917 */
1918static int
1919drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count)
1920{
1921	drm_intel_bufmgr_gem *bufmgr_gem =
1922	    (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr;
1923	unsigned int total = 0;
1924	unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4;
1925	int total_fences;
1926
1927	/* Check for fence reg constraints if necessary */
1928	if (bufmgr_gem->available_fences) {
1929		total_fences = drm_intel_gem_total_fences(bo_array, count);
1930		if (total_fences > bufmgr_gem->available_fences)
1931			return -ENOSPC;
1932	}
1933
1934	total = drm_intel_gem_estimate_batch_space(bo_array, count);
1935
1936	if (total > threshold)
1937		total = drm_intel_gem_compute_batch_space(bo_array, count);
1938
1939	if (total > threshold) {
1940		DBG("check_space: overflowed available aperture, "
1941		    "%dkb vs %dkb\n",
1942		    total / 1024, (int)bufmgr_gem->gtt_size / 1024);
1943		return -ENOSPC;
1944	} else {
1945		DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024,
1946		    (int)bufmgr_gem->gtt_size / 1024);
1947		return 0;
1948	}
1949}
1950
1951/*
1952 * Disable buffer reuse for objects which are shared with the kernel
1953 * as scanout buffers
1954 */
1955static int
1956drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo)
1957{
1958	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1959
1960	bo_gem->reusable = 0;
1961	return 0;
1962}
1963
1964static int
1965drm_intel_gem_bo_is_reusable(drm_intel_bo *bo)
1966{
1967	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1968
1969	return bo_gem->reusable;
1970}
1971
1972static int
1973_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
1974{
1975	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1976	int i;
1977
1978	for (i = 0; i < bo_gem->reloc_count; i++) {
1979		if (bo_gem->reloc_target_info[i].bo == target_bo)
1980			return 1;
1981		if (bo == bo_gem->reloc_target_info[i].bo)
1982			continue;
1983		if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo,
1984						target_bo))
1985			return 1;
1986	}
1987
1988	return 0;
1989}
1990
1991/** Return true if target_bo is referenced by bo's relocation tree. */
1992static int
1993drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
1994{
1995	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
1996
1997	if (bo == NULL || target_bo == NULL)
1998		return 0;
1999	if (target_bo_gem->used_as_reloc_target)
2000		return _drm_intel_gem_bo_references(bo, target_bo);
2001	return 0;
2002}
2003
2004static void
2005add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size)
2006{
2007	unsigned int i = bufmgr_gem->num_buckets;
2008
2009	assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket));
2010
2011	DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
2012	bufmgr_gem->cache_bucket[i].size = size;
2013	bufmgr_gem->num_buckets++;
2014}
2015
2016static void
2017init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem)
2018{
2019	unsigned long size, cache_max_size = 64 * 1024 * 1024;
2020
2021	/* OK, so power of two buckets was too wasteful of memory.
2022	 * Give 3 other sizes between each power of two, to hopefully
2023	 * cover things accurately enough.  (The alternative is
2024	 * probably to just go for exact matching of sizes, and assume
2025	 * that for things like composited window resize the tiled
2026	 * width/height alignment and rounding of sizes to pages will
2027	 * get us useful cache hit rates anyway)
2028	 */
2029	add_bucket(bufmgr_gem, 4096);
2030	add_bucket(bufmgr_gem, 4096 * 2);
2031	add_bucket(bufmgr_gem, 4096 * 3);
2032
2033	/* Initialize the linked lists for BO reuse cache. */
2034	for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
2035		add_bucket(bufmgr_gem, size);
2036
2037		add_bucket(bufmgr_gem, size + size * 1 / 4);
2038		add_bucket(bufmgr_gem, size + size * 2 / 4);
2039		add_bucket(bufmgr_gem, size + size * 3 / 4);
2040	}
2041}
2042
2043/**
2044 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
2045 * and manage map buffer objections.
2046 *
2047 * \param fd File descriptor of the opened DRM device.
2048 */
2049drm_intel_bufmgr *
2050drm_intel_bufmgr_gem_init(int fd, int batch_size)
2051{
2052	drm_intel_bufmgr_gem *bufmgr_gem;
2053	struct drm_i915_gem_get_aperture aperture;
2054	drm_i915_getparam_t gp;
2055	int ret;
2056	int exec2 = 0, has_bsd = 0;
2057
2058	bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
2059	if (bufmgr_gem == NULL)
2060		return NULL;
2061
2062	bufmgr_gem->fd = fd;
2063
2064	if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) {
2065		free(bufmgr_gem);
2066		return NULL;
2067	}
2068
2069	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture);
2070
2071	if (ret == 0)
2072		bufmgr_gem->gtt_size = aperture.aper_available_size;
2073	else {
2074		fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n",
2075			strerror(errno));
2076		bufmgr_gem->gtt_size = 128 * 1024 * 1024;
2077		fprintf(stderr, "Assuming %dkB available aperture size.\n"
2078			"May lead to reduced performance or incorrect "
2079			"rendering.\n",
2080			(int)bufmgr_gem->gtt_size / 1024);
2081	}
2082
2083	gp.param = I915_PARAM_CHIPSET_ID;
2084	gp.value = &bufmgr_gem->pci_device;
2085	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
2086	if (ret) {
2087		fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno);
2088		fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
2089	}
2090
2091	if (IS_GEN2(bufmgr_gem))
2092		bufmgr_gem->gen = 2;
2093	else if (IS_GEN3(bufmgr_gem))
2094		bufmgr_gem->gen = 3;
2095	else if (IS_GEN4(bufmgr_gem))
2096		bufmgr_gem->gen = 4;
2097	else
2098		bufmgr_gem->gen = 6;
2099
2100	gp.param = I915_PARAM_HAS_EXECBUF2;
2101	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
2102	if (!ret)
2103		exec2 = 1;
2104
2105	gp.param = I915_PARAM_HAS_BSD;
2106	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
2107	if (!ret)
2108		has_bsd = 1;
2109
2110	if (bufmgr_gem->gen < 4) {
2111		gp.param = I915_PARAM_NUM_FENCES_AVAIL;
2112		gp.value = &bufmgr_gem->available_fences;
2113		ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
2114		if (ret) {
2115			fprintf(stderr, "get fences failed: %d [%d]\n", ret,
2116				errno);
2117			fprintf(stderr, "param: %d, val: %d\n", gp.param,
2118				*gp.value);
2119			bufmgr_gem->available_fences = 0;
2120		} else {
2121			/* XXX The kernel reports the total number of fences,
2122			 * including any that may be pinned.
2123			 *
2124			 * We presume that there will be at least one pinned
2125			 * fence for the scanout buffer, but there may be more
2126			 * than one scanout and the user may be manually
2127			 * pinning buffers. Let's move to execbuffer2 and
2128			 * thereby forget the insanity of using fences...
2129			 */
2130			bufmgr_gem->available_fences -= 2;
2131			if (bufmgr_gem->available_fences < 0)
2132				bufmgr_gem->available_fences = 0;
2133		}
2134	}
2135
2136	/* Let's go with one relocation per every 2 dwords (but round down a bit
2137	 * since a power of two will mean an extra page allocation for the reloc
2138	 * buffer).
2139	 *
2140	 * Every 4 was too few for the blender benchmark.
2141	 */
2142	bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
2143
2144	bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc;
2145	bufmgr_gem->bufmgr.bo_alloc_for_render =
2146	    drm_intel_gem_bo_alloc_for_render;
2147	bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled;
2148	bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference;
2149	bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference;
2150	bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map;
2151	bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap;
2152	bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata;
2153	bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata;
2154	bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
2155	bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
2156	bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
2157	bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
2158	bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
2159	bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
2160	bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling;
2161	bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink;
2162	/* Use the new one if available */
2163	if (exec2) {
2164		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2;
2165		if (has_bsd)
2166			bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2;
2167	} else
2168		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec;
2169	bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy;
2170	bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise;
2171	bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy;
2172	bufmgr_gem->bufmgr.debug = 0;
2173	bufmgr_gem->bufmgr.check_aperture_space =
2174	    drm_intel_gem_check_aperture_space;
2175	bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse;
2176	bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable;
2177	bufmgr_gem->bufmgr.get_pipe_from_crtc_id =
2178	    drm_intel_gem_get_pipe_from_crtc_id;
2179	bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references;
2180
2181	init_cache_buckets(bufmgr_gem);
2182
2183	return &bufmgr_gem->bufmgr;
2184}
2185