intel_bufmgr_gem.c revision 515cea6ac67eb458c59fececc3c67411ee6fd3c3
1/**************************************************************************
2 *
3 * Copyright � 2007 Red Hat Inc.
4 * Copyright � 2007 Intel Corporation
5 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23 *
24 * The above copyright notice and this permission notice (including the
25 * next paragraph) shall be included in all copies or substantial portions
26 * of the Software.
27 *
28 *
29 **************************************************************************/
30/*
31 * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com>
32 *          Keith Whitwell <keithw-at-tungstengraphics-dot-com>
33 *	    Eric Anholt <eric@anholt.net>
34 *	    Dave Airlie <airlied@linux.ie>
35 */
36
37#ifdef HAVE_CONFIG_H
38#include "config.h"
39#endif
40
41#include <xf86drm.h>
42#include <xf86atomic.h>
43#include <fcntl.h>
44#include <stdio.h>
45#include <stdlib.h>
46#include <string.h>
47#include <unistd.h>
48#include <assert.h>
49#include <pthread.h>
50#include <sys/ioctl.h>
51#include <sys/mman.h>
52#include <sys/stat.h>
53#include <sys/types.h>
54#include <stdbool.h>
55
56#include "errno.h"
57#include "libdrm_lists.h"
58#include "intel_bufmgr.h"
59#include "intel_bufmgr_priv.h"
60#include "intel_chipset.h"
61#include "string.h"
62
63#include "i915_drm.h"
64
65#define DBG(...) do {					\
66	if (bufmgr_gem->bufmgr.debug)			\
67		fprintf(stderr, __VA_ARGS__);		\
68} while (0)
69
70#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
71
72typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
73
74struct drm_intel_gem_bo_bucket {
75	drmMMListHead head;
76	unsigned long size;
77};
78
79typedef struct _drm_intel_bufmgr_gem {
80	drm_intel_bufmgr bufmgr;
81
82	int fd;
83
84	int max_relocs;
85
86	pthread_mutex_t lock;
87
88	struct drm_i915_gem_exec_object *exec_objects;
89	struct drm_i915_gem_exec_object2 *exec2_objects;
90	drm_intel_bo **exec_bos;
91	int exec_size;
92	int exec_count;
93
94	/** Array of lists of cached gem objects of power-of-two sizes */
95	struct drm_intel_gem_bo_bucket cache_bucket[14 * 4];
96	int num_buckets;
97	time_t time;
98
99	drmMMListHead named;
100
101	uint64_t gtt_size;
102	int available_fences;
103	int pci_device;
104	int gen;
105	unsigned int has_bsd : 1;
106	unsigned int has_blt : 1;
107	unsigned int has_relaxed_fencing : 1;
108	unsigned int bo_reuse : 1;
109	bool fenced_relocs;
110} drm_intel_bufmgr_gem;
111
112#define DRM_INTEL_RELOC_FENCE (1<<0)
113
114typedef struct _drm_intel_reloc_target_info {
115	drm_intel_bo *bo;
116	int flags;
117} drm_intel_reloc_target;
118
119struct _drm_intel_bo_gem {
120	drm_intel_bo bo;
121
122	atomic_t refcount;
123	uint32_t gem_handle;
124	const char *name;
125
126	/**
127	 * Kenel-assigned global name for this object
128	 */
129	unsigned int global_name;
130	drmMMListHead name_list;
131
132	/**
133	 * Index of the buffer within the validation list while preparing a
134	 * batchbuffer execution.
135	 */
136	int validate_index;
137
138	/**
139	 * Current tiling mode
140	 */
141	uint32_t tiling_mode;
142	uint32_t swizzle_mode;
143	unsigned long stride;
144
145	time_t free_time;
146
147	/** Array passed to the DRM containing relocation information. */
148	struct drm_i915_gem_relocation_entry *relocs;
149	/**
150	 * Array of info structs corresponding to relocs[i].target_handle etc
151	 */
152	drm_intel_reloc_target *reloc_target_info;
153	/** Number of entries in relocs */
154	int reloc_count;
155	/** Mapped address for the buffer, saved across map/unmap cycles */
156	void *mem_virtual;
157	/** GTT virtual address for the buffer, saved across map/unmap cycles */
158	void *gtt_virtual;
159
160	/** BO cache list */
161	drmMMListHead head;
162
163	/**
164	 * Boolean of whether this BO and its children have been included in
165	 * the current drm_intel_bufmgr_check_aperture_space() total.
166	 */
167	bool included_in_check_aperture;
168
169	/**
170	 * Boolean of whether this buffer has been used as a relocation
171	 * target and had its size accounted for, and thus can't have any
172	 * further relocations added to it.
173	 */
174	bool used_as_reloc_target;
175
176	/**
177	 * Boolean of whether we have encountered an error whilst building the relocation tree.
178	 */
179	bool has_error;
180
181	/**
182	 * Boolean of whether this buffer can be re-used
183	 */
184	bool reusable;
185
186	/**
187	 * Size in bytes of this buffer and its relocation descendents.
188	 *
189	 * Used to avoid costly tree walking in
190	 * drm_intel_bufmgr_check_aperture in the common case.
191	 */
192	int reloc_tree_size;
193
194	/**
195	 * Number of potential fence registers required by this buffer and its
196	 * relocations.
197	 */
198	int reloc_tree_fences;
199};
200
201static unsigned int
202drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count);
203
204static unsigned int
205drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count);
206
207static int
208drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
209			    uint32_t * swizzle_mode);
210
211static int
212drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
213				     uint32_t tiling_mode,
214				     uint32_t stride);
215
216static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
217						      time_t time);
218
219static void drm_intel_gem_bo_unreference(drm_intel_bo *bo);
220
221static void drm_intel_gem_bo_free(drm_intel_bo *bo);
222
223static unsigned long
224drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size,
225			   uint32_t *tiling_mode)
226{
227	unsigned long min_size, max_size;
228	unsigned long i;
229
230	if (*tiling_mode == I915_TILING_NONE)
231		return size;
232
233	/* 965+ just need multiples of page size for tiling */
234	if (bufmgr_gem->gen >= 4)
235		return ROUND_UP_TO(size, 4096);
236
237	/* Older chips need powers of two, of at least 512k or 1M */
238	if (bufmgr_gem->gen == 3) {
239		min_size = 1024*1024;
240		max_size = 128*1024*1024;
241	} else {
242		min_size = 512*1024;
243		max_size = 64*1024*1024;
244	}
245
246	if (size > max_size) {
247		*tiling_mode = I915_TILING_NONE;
248		return size;
249	}
250
251	/* Do we need to allocate every page for the fence? */
252	if (bufmgr_gem->has_relaxed_fencing)
253		return ROUND_UP_TO(size, 4096);
254
255	for (i = min_size; i < size; i <<= 1)
256		;
257
258	return i;
259}
260
261/*
262 * Round a given pitch up to the minimum required for X tiling on a
263 * given chip.  We use 512 as the minimum to allow for a later tiling
264 * change.
265 */
266static unsigned long
267drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem,
268			    unsigned long pitch, uint32_t *tiling_mode)
269{
270	unsigned long tile_width;
271	unsigned long i;
272
273	/* If untiled, then just align it so that we can do rendering
274	 * to it with the 3D engine.
275	 */
276	if (*tiling_mode == I915_TILING_NONE)
277		return ALIGN(pitch, 64);
278
279	if (*tiling_mode == I915_TILING_X
280			|| (IS_915(bufmgr_gem) && *tiling_mode == I915_TILING_Y))
281		tile_width = 512;
282	else
283		tile_width = 128;
284
285	/* 965 is flexible */
286	if (bufmgr_gem->gen >= 4)
287		return ROUND_UP_TO(pitch, tile_width);
288
289	/* The older hardware has a maximum pitch of 8192 with tiled
290	 * surfaces, so fallback to untiled if it's too large.
291	 */
292	if (pitch > 8192) {
293		*tiling_mode = I915_TILING_NONE;
294		return ALIGN(pitch, 64);
295	}
296
297	/* Pre-965 needs power of two tile width */
298	for (i = tile_width; i < pitch; i <<= 1)
299		;
300
301	return i;
302}
303
304static struct drm_intel_gem_bo_bucket *
305drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
306				 unsigned long size)
307{
308	int i;
309
310	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
311		struct drm_intel_gem_bo_bucket *bucket =
312		    &bufmgr_gem->cache_bucket[i];
313		if (bucket->size >= size) {
314			return bucket;
315		}
316	}
317
318	return NULL;
319}
320
321static void
322drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
323{
324	int i, j;
325
326	for (i = 0; i < bufmgr_gem->exec_count; i++) {
327		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
328		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
329
330		if (bo_gem->relocs == NULL) {
331			DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle,
332			    bo_gem->name);
333			continue;
334		}
335
336		for (j = 0; j < bo_gem->reloc_count; j++) {
337			drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo;
338			drm_intel_bo_gem *target_gem =
339			    (drm_intel_bo_gem *) target_bo;
340
341			DBG("%2d: %d (%s)@0x%08llx -> "
342			    "%d (%s)@0x%08lx + 0x%08x\n",
343			    i,
344			    bo_gem->gem_handle, bo_gem->name,
345			    (unsigned long long)bo_gem->relocs[j].offset,
346			    target_gem->gem_handle,
347			    target_gem->name,
348			    target_bo->offset,
349			    bo_gem->relocs[j].delta);
350		}
351	}
352}
353
354static inline void
355drm_intel_gem_bo_reference(drm_intel_bo *bo)
356{
357	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
358
359	atomic_inc(&bo_gem->refcount);
360}
361
362/**
363 * Adds the given buffer to the list of buffers to be validated (moved into the
364 * appropriate memory type) with the next batch submission.
365 *
366 * If a buffer is validated multiple times in a batch submission, it ends up
367 * with the intersection of the memory type flags and the union of the
368 * access flags.
369 */
370static void
371drm_intel_add_validate_buffer(drm_intel_bo *bo)
372{
373	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
374	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
375	int index;
376
377	if (bo_gem->validate_index != -1)
378		return;
379
380	/* Extend the array of validation entries as necessary. */
381	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
382		int new_size = bufmgr_gem->exec_size * 2;
383
384		if (new_size == 0)
385			new_size = 5;
386
387		bufmgr_gem->exec_objects =
388		    realloc(bufmgr_gem->exec_objects,
389			    sizeof(*bufmgr_gem->exec_objects) * new_size);
390		bufmgr_gem->exec_bos =
391		    realloc(bufmgr_gem->exec_bos,
392			    sizeof(*bufmgr_gem->exec_bos) * new_size);
393		bufmgr_gem->exec_size = new_size;
394	}
395
396	index = bufmgr_gem->exec_count;
397	bo_gem->validate_index = index;
398	/* Fill in array entry */
399	bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle;
400	bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count;
401	bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs;
402	bufmgr_gem->exec_objects[index].alignment = 0;
403	bufmgr_gem->exec_objects[index].offset = 0;
404	bufmgr_gem->exec_bos[index] = bo;
405	bufmgr_gem->exec_count++;
406}
407
408static void
409drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
410{
411	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
412	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
413	int index;
414
415	if (bo_gem->validate_index != -1) {
416		if (need_fence)
417			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
418				EXEC_OBJECT_NEEDS_FENCE;
419		return;
420	}
421
422	/* Extend the array of validation entries as necessary. */
423	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
424		int new_size = bufmgr_gem->exec_size * 2;
425
426		if (new_size == 0)
427			new_size = 5;
428
429		bufmgr_gem->exec2_objects =
430			realloc(bufmgr_gem->exec2_objects,
431				sizeof(*bufmgr_gem->exec2_objects) * new_size);
432		bufmgr_gem->exec_bos =
433			realloc(bufmgr_gem->exec_bos,
434				sizeof(*bufmgr_gem->exec_bos) * new_size);
435		bufmgr_gem->exec_size = new_size;
436	}
437
438	index = bufmgr_gem->exec_count;
439	bo_gem->validate_index = index;
440	/* Fill in array entry */
441	bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle;
442	bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
443	bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
444	bufmgr_gem->exec2_objects[index].alignment = 0;
445	bufmgr_gem->exec2_objects[index].offset = 0;
446	bufmgr_gem->exec_bos[index] = bo;
447	bufmgr_gem->exec2_objects[index].flags = 0;
448	bufmgr_gem->exec2_objects[index].rsvd1 = 0;
449	bufmgr_gem->exec2_objects[index].rsvd2 = 0;
450	if (need_fence) {
451		bufmgr_gem->exec2_objects[index].flags |=
452			EXEC_OBJECT_NEEDS_FENCE;
453	}
454	bufmgr_gem->exec_count++;
455}
456
457#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
458	sizeof(uint32_t))
459
460static void
461drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem,
462				      drm_intel_bo_gem *bo_gem)
463{
464	int size;
465
466	assert(!bo_gem->used_as_reloc_target);
467
468	/* The older chipsets are far-less flexible in terms of tiling,
469	 * and require tiled buffer to be size aligned in the aperture.
470	 * This means that in the worst possible case we will need a hole
471	 * twice as large as the object in order for it to fit into the
472	 * aperture. Optimal packing is for wimps.
473	 */
474	size = bo_gem->bo.size;
475	if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) {
476		int min_size;
477
478		if (bufmgr_gem->has_relaxed_fencing) {
479			if (bufmgr_gem->gen == 3)
480				min_size = 1024*1024;
481			else
482				min_size = 512*1024;
483
484			while (min_size < size)
485				min_size *= 2;
486		} else
487			min_size = size;
488
489		/* Account for worst-case alignment. */
490		size = 2 * min_size;
491	}
492
493	bo_gem->reloc_tree_size = size;
494}
495
496static int
497drm_intel_setup_reloc_list(drm_intel_bo *bo)
498{
499	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
500	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
501	unsigned int max_relocs = bufmgr_gem->max_relocs;
502
503	if (bo->size / 4 < max_relocs)
504		max_relocs = bo->size / 4;
505
506	bo_gem->relocs = malloc(max_relocs *
507				sizeof(struct drm_i915_gem_relocation_entry));
508	bo_gem->reloc_target_info = malloc(max_relocs *
509					   sizeof(drm_intel_reloc_target));
510	if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) {
511		bo_gem->has_error = true;
512
513		free (bo_gem->relocs);
514		bo_gem->relocs = NULL;
515
516		free (bo_gem->reloc_target_info);
517		bo_gem->reloc_target_info = NULL;
518
519		return 1;
520	}
521
522	return 0;
523}
524
525static int
526drm_intel_gem_bo_busy(drm_intel_bo *bo)
527{
528	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
529	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
530	struct drm_i915_gem_busy busy;
531	int ret;
532
533	memset(&busy, 0, sizeof(busy));
534	busy.handle = bo_gem->gem_handle;
535
536	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
537
538	return (ret == 0 && busy.busy);
539}
540
541static int
542drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem,
543				  drm_intel_bo_gem *bo_gem, int state)
544{
545	struct drm_i915_gem_madvise madv;
546
547	madv.handle = bo_gem->gem_handle;
548	madv.madv = state;
549	madv.retained = 1;
550	drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
551
552	return madv.retained;
553}
554
555static int
556drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv)
557{
558	return drm_intel_gem_bo_madvise_internal
559		((drm_intel_bufmgr_gem *) bo->bufmgr,
560		 (drm_intel_bo_gem *) bo,
561		 madv);
562}
563
564/* drop the oldest entries that have been purged by the kernel */
565static void
566drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
567				    struct drm_intel_gem_bo_bucket *bucket)
568{
569	while (!DRMLISTEMPTY(&bucket->head)) {
570		drm_intel_bo_gem *bo_gem;
571
572		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
573				      bucket->head.next, head);
574		if (drm_intel_gem_bo_madvise_internal
575		    (bufmgr_gem, bo_gem, I915_MADV_DONTNEED))
576			break;
577
578		DRMLISTDEL(&bo_gem->head);
579		drm_intel_gem_bo_free(&bo_gem->bo);
580	}
581}
582
583static drm_intel_bo *
584drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
585				const char *name,
586				unsigned long size,
587				unsigned long flags,
588				uint32_t tiling_mode,
589				unsigned long stride)
590{
591	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
592	drm_intel_bo_gem *bo_gem;
593	unsigned int page_size = getpagesize();
594	int ret;
595	struct drm_intel_gem_bo_bucket *bucket;
596	bool alloc_from_cache;
597	unsigned long bo_size;
598	bool for_render = false;
599
600	if (flags & BO_ALLOC_FOR_RENDER)
601		for_render = true;
602
603	/* Round the allocated size up to a power of two number of pages. */
604	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
605
606	/* If we don't have caching at this size, don't actually round the
607	 * allocation up.
608	 */
609	if (bucket == NULL) {
610		bo_size = size;
611		if (bo_size < page_size)
612			bo_size = page_size;
613	} else {
614		bo_size = bucket->size;
615	}
616
617	pthread_mutex_lock(&bufmgr_gem->lock);
618	/* Get a buffer out of the cache if available */
619retry:
620	alloc_from_cache = false;
621	if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) {
622		if (for_render) {
623			/* Allocate new render-target BOs from the tail (MRU)
624			 * of the list, as it will likely be hot in the GPU
625			 * cache and in the aperture for us.
626			 */
627			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
628					      bucket->head.prev, head);
629			DRMLISTDEL(&bo_gem->head);
630			alloc_from_cache = true;
631		} else {
632			/* For non-render-target BOs (where we're probably
633			 * going to map it first thing in order to fill it
634			 * with data), check if the last BO in the cache is
635			 * unbusy, and only reuse in that case. Otherwise,
636			 * allocating a new buffer is probably faster than
637			 * waiting for the GPU to finish.
638			 */
639			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
640					      bucket->head.next, head);
641			if (!drm_intel_gem_bo_busy(&bo_gem->bo)) {
642				alloc_from_cache = true;
643				DRMLISTDEL(&bo_gem->head);
644			}
645		}
646
647		if (alloc_from_cache) {
648			if (!drm_intel_gem_bo_madvise_internal
649			    (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
650				drm_intel_gem_bo_free(&bo_gem->bo);
651				drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
652								    bucket);
653				goto retry;
654			}
655
656			if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
657								 tiling_mode,
658								 stride)) {
659				drm_intel_gem_bo_free(&bo_gem->bo);
660				goto retry;
661			}
662		}
663	}
664	pthread_mutex_unlock(&bufmgr_gem->lock);
665
666	if (!alloc_from_cache) {
667		struct drm_i915_gem_create create;
668
669		bo_gem = calloc(1, sizeof(*bo_gem));
670		if (!bo_gem)
671			return NULL;
672
673		bo_gem->bo.size = bo_size;
674		memset(&create, 0, sizeof(create));
675		create.size = bo_size;
676
677		ret = drmIoctl(bufmgr_gem->fd,
678			       DRM_IOCTL_I915_GEM_CREATE,
679			       &create);
680		bo_gem->gem_handle = create.handle;
681		bo_gem->bo.handle = bo_gem->gem_handle;
682		if (ret != 0) {
683			free(bo_gem);
684			return NULL;
685		}
686		bo_gem->bo.bufmgr = bufmgr;
687
688		bo_gem->tiling_mode = I915_TILING_NONE;
689		bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
690		bo_gem->stride = 0;
691
692		if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
693							 tiling_mode,
694							 stride)) {
695		    drm_intel_gem_bo_free(&bo_gem->bo);
696		    return NULL;
697		}
698
699		DRMINITLISTHEAD(&bo_gem->name_list);
700	}
701
702	bo_gem->name = name;
703	atomic_set(&bo_gem->refcount, 1);
704	bo_gem->validate_index = -1;
705	bo_gem->reloc_tree_fences = 0;
706	bo_gem->used_as_reloc_target = false;
707	bo_gem->has_error = false;
708	bo_gem->reusable = true;
709
710	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
711
712	DBG("bo_create: buf %d (%s) %ldb\n",
713	    bo_gem->gem_handle, bo_gem->name, size);
714
715	return &bo_gem->bo;
716}
717
718static drm_intel_bo *
719drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
720				  const char *name,
721				  unsigned long size,
722				  unsigned int alignment)
723{
724	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
725					       BO_ALLOC_FOR_RENDER,
726					       I915_TILING_NONE, 0);
727}
728
729static drm_intel_bo *
730drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr,
731		       const char *name,
732		       unsigned long size,
733		       unsigned int alignment)
734{
735	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0,
736					       I915_TILING_NONE, 0);
737}
738
739static drm_intel_bo *
740drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
741			     int x, int y, int cpp, uint32_t *tiling_mode,
742			     unsigned long *pitch, unsigned long flags)
743{
744	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
745	unsigned long size, stride;
746	uint32_t tiling;
747
748	do {
749		unsigned long aligned_y, height_alignment;
750
751		tiling = *tiling_mode;
752
753		/* If we're tiled, our allocations are in 8 or 32-row blocks,
754		 * so failure to align our height means that we won't allocate
755		 * enough pages.
756		 *
757		 * If we're untiled, we still have to align to 2 rows high
758		 * because the data port accesses 2x2 blocks even if the
759		 * bottom row isn't to be rendered, so failure to align means
760		 * we could walk off the end of the GTT and fault.  This is
761		 * documented on 965, and may be the case on older chipsets
762		 * too so we try to be careful.
763		 */
764		aligned_y = y;
765		height_alignment = 2;
766
767		if (IS_GEN2(bufmgr_gem) && tiling != I915_TILING_NONE)
768			height_alignment = 16;
769		else if (tiling == I915_TILING_X
770			|| (IS_915(bufmgr_gem) && tiling == I915_TILING_Y))
771			height_alignment = 8;
772		else if (tiling == I915_TILING_Y)
773			height_alignment = 32;
774		aligned_y = ALIGN(y, height_alignment);
775
776		stride = x * cpp;
777		stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode);
778		size = stride * aligned_y;
779		size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode);
780	} while (*tiling_mode != tiling);
781	*pitch = stride;
782
783	if (tiling == I915_TILING_NONE)
784		stride = 0;
785
786	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags,
787					       tiling, stride);
788}
789
790/**
791 * Returns a drm_intel_bo wrapping the given buffer object handle.
792 *
793 * This can be used when one application needs to pass a buffer object
794 * to another.
795 */
796drm_intel_bo *
797drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr,
798				  const char *name,
799				  unsigned int handle)
800{
801	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
802	drm_intel_bo_gem *bo_gem;
803	int ret;
804	struct drm_gem_open open_arg;
805	struct drm_i915_gem_get_tiling get_tiling;
806	drmMMListHead *list;
807
808	/* At the moment most applications only have a few named bo.
809	 * For instance, in a DRI client only the render buffers passed
810	 * between X and the client are named. And since X returns the
811	 * alternating names for the front/back buffer a linear search
812	 * provides a sufficiently fast match.
813	 */
814	for (list = bufmgr_gem->named.next;
815	     list != &bufmgr_gem->named;
816	     list = list->next) {
817		bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list);
818		if (bo_gem->global_name == handle) {
819			drm_intel_gem_bo_reference(&bo_gem->bo);
820			return &bo_gem->bo;
821		}
822	}
823
824	bo_gem = calloc(1, sizeof(*bo_gem));
825	if (!bo_gem)
826		return NULL;
827
828	memset(&open_arg, 0, sizeof(open_arg));
829	open_arg.name = handle;
830	ret = drmIoctl(bufmgr_gem->fd,
831		       DRM_IOCTL_GEM_OPEN,
832		       &open_arg);
833	if (ret != 0) {
834		DBG("Couldn't reference %s handle 0x%08x: %s\n",
835		    name, handle, strerror(errno));
836		free(bo_gem);
837		return NULL;
838	}
839	bo_gem->bo.size = open_arg.size;
840	bo_gem->bo.offset = 0;
841	bo_gem->bo.virtual = NULL;
842	bo_gem->bo.bufmgr = bufmgr;
843	bo_gem->name = name;
844	atomic_set(&bo_gem->refcount, 1);
845	bo_gem->validate_index = -1;
846	bo_gem->gem_handle = open_arg.handle;
847	bo_gem->bo.handle = open_arg.handle;
848	bo_gem->global_name = handle;
849	bo_gem->reusable = false;
850
851	memset(&get_tiling, 0, sizeof(get_tiling));
852	get_tiling.handle = bo_gem->gem_handle;
853	ret = drmIoctl(bufmgr_gem->fd,
854		       DRM_IOCTL_I915_GEM_GET_TILING,
855		       &get_tiling);
856	if (ret != 0) {
857		drm_intel_gem_bo_unreference(&bo_gem->bo);
858		return NULL;
859	}
860	bo_gem->tiling_mode = get_tiling.tiling_mode;
861	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
862	/* XXX stride is unknown */
863	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
864
865	DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
866	DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
867
868	return &bo_gem->bo;
869}
870
871static void
872drm_intel_gem_bo_free(drm_intel_bo *bo)
873{
874	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
875	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
876	struct drm_gem_close close;
877	int ret;
878
879	if (bo_gem->mem_virtual)
880		munmap(bo_gem->mem_virtual, bo_gem->bo.size);
881	if (bo_gem->gtt_virtual)
882		munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
883
884	/* Close this object */
885	memset(&close, 0, sizeof(close));
886	close.handle = bo_gem->gem_handle;
887	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close);
888	if (ret != 0) {
889		DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
890		    bo_gem->gem_handle, bo_gem->name, strerror(errno));
891	}
892	free(bo);
893}
894
895/** Frees all cached buffers significantly older than @time. */
896static void
897drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
898{
899	int i;
900
901	if (bufmgr_gem->time == time)
902		return;
903
904	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
905		struct drm_intel_gem_bo_bucket *bucket =
906		    &bufmgr_gem->cache_bucket[i];
907
908		while (!DRMLISTEMPTY(&bucket->head)) {
909			drm_intel_bo_gem *bo_gem;
910
911			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
912					      bucket->head.next, head);
913			if (time - bo_gem->free_time <= 1)
914				break;
915
916			DRMLISTDEL(&bo_gem->head);
917
918			drm_intel_gem_bo_free(&bo_gem->bo);
919		}
920	}
921
922	bufmgr_gem->time = time;
923}
924
925static void
926drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
927{
928	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
929	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
930	struct drm_intel_gem_bo_bucket *bucket;
931	int i;
932
933	/* Unreference all the target buffers */
934	for (i = 0; i < bo_gem->reloc_count; i++) {
935		if (bo_gem->reloc_target_info[i].bo != bo) {
936			drm_intel_gem_bo_unreference_locked_timed(bo_gem->
937								  reloc_target_info[i].bo,
938								  time);
939		}
940	}
941	bo_gem->reloc_count = 0;
942	bo_gem->used_as_reloc_target = false;
943
944	DBG("bo_unreference final: %d (%s)\n",
945	    bo_gem->gem_handle, bo_gem->name);
946
947	/* release memory associated with this object */
948	if (bo_gem->reloc_target_info) {
949		free(bo_gem->reloc_target_info);
950		bo_gem->reloc_target_info = NULL;
951	}
952	if (bo_gem->relocs) {
953		free(bo_gem->relocs);
954		bo_gem->relocs = NULL;
955	}
956
957	DRMLISTDEL(&bo_gem->name_list);
958
959	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
960	/* Put the buffer into our internal cache for reuse if we can. */
961	if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
962	    drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
963					      I915_MADV_DONTNEED)) {
964		bo_gem->free_time = time;
965
966		bo_gem->name = NULL;
967		bo_gem->validate_index = -1;
968
969		DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
970	} else {
971		drm_intel_gem_bo_free(bo);
972	}
973}
974
975static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
976						      time_t time)
977{
978	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
979
980	assert(atomic_read(&bo_gem->refcount) > 0);
981	if (atomic_dec_and_test(&bo_gem->refcount))
982		drm_intel_gem_bo_unreference_final(bo, time);
983}
984
985static void drm_intel_gem_bo_unreference(drm_intel_bo *bo)
986{
987	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
988
989	assert(atomic_read(&bo_gem->refcount) > 0);
990	if (atomic_dec_and_test(&bo_gem->refcount)) {
991		drm_intel_bufmgr_gem *bufmgr_gem =
992		    (drm_intel_bufmgr_gem *) bo->bufmgr;
993		struct timespec time;
994
995		clock_gettime(CLOCK_MONOTONIC, &time);
996
997		pthread_mutex_lock(&bufmgr_gem->lock);
998		drm_intel_gem_bo_unreference_final(bo, time.tv_sec);
999		drm_intel_gem_cleanup_bo_cache(bufmgr_gem, time.tv_sec);
1000		pthread_mutex_unlock(&bufmgr_gem->lock);
1001	}
1002}
1003
1004static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
1005{
1006	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1007	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1008	struct drm_i915_gem_set_domain set_domain;
1009	int ret;
1010
1011	pthread_mutex_lock(&bufmgr_gem->lock);
1012
1013	/* Allow recursive mapping. Mesa may recursively map buffers with
1014	 * nested display loops.
1015	 */
1016	if (!bo_gem->mem_virtual) {
1017		struct drm_i915_gem_mmap mmap_arg;
1018
1019		DBG("bo_map: %d (%s)\n", bo_gem->gem_handle, bo_gem->name);
1020
1021		memset(&mmap_arg, 0, sizeof(mmap_arg));
1022		mmap_arg.handle = bo_gem->gem_handle;
1023		mmap_arg.offset = 0;
1024		mmap_arg.size = bo->size;
1025		ret = drmIoctl(bufmgr_gem->fd,
1026			       DRM_IOCTL_I915_GEM_MMAP,
1027			       &mmap_arg);
1028		if (ret != 0) {
1029			ret = -errno;
1030			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1031			    __FILE__, __LINE__, bo_gem->gem_handle,
1032			    bo_gem->name, strerror(errno));
1033			pthread_mutex_unlock(&bufmgr_gem->lock);
1034			return ret;
1035		}
1036		bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
1037	}
1038	DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1039	    bo_gem->mem_virtual);
1040	bo->virtual = bo_gem->mem_virtual;
1041
1042	set_domain.handle = bo_gem->gem_handle;
1043	set_domain.read_domains = I915_GEM_DOMAIN_CPU;
1044	if (write_enable)
1045		set_domain.write_domain = I915_GEM_DOMAIN_CPU;
1046	else
1047		set_domain.write_domain = 0;
1048	ret = drmIoctl(bufmgr_gem->fd,
1049		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1050		       &set_domain);
1051	if (ret != 0) {
1052		DBG("%s:%d: Error setting to CPU domain %d: %s\n",
1053		    __FILE__, __LINE__, bo_gem->gem_handle,
1054		    strerror(errno));
1055	}
1056
1057	pthread_mutex_unlock(&bufmgr_gem->lock);
1058
1059	return 0;
1060}
1061
1062int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
1063{
1064	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1065	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1066	struct drm_i915_gem_set_domain set_domain;
1067	int ret;
1068
1069	pthread_mutex_lock(&bufmgr_gem->lock);
1070
1071	/* Get a mapping of the buffer if we haven't before. */
1072	if (bo_gem->gtt_virtual == NULL) {
1073		struct drm_i915_gem_mmap_gtt mmap_arg;
1074
1075		DBG("bo_map_gtt: mmap %d (%s)\n", bo_gem->gem_handle,
1076		    bo_gem->name);
1077
1078		memset(&mmap_arg, 0, sizeof(mmap_arg));
1079		mmap_arg.handle = bo_gem->gem_handle;
1080
1081		/* Get the fake offset back... */
1082		ret = drmIoctl(bufmgr_gem->fd,
1083			       DRM_IOCTL_I915_GEM_MMAP_GTT,
1084			       &mmap_arg);
1085		if (ret != 0) {
1086			ret = -errno;
1087			DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
1088			    __FILE__, __LINE__,
1089			    bo_gem->gem_handle, bo_gem->name,
1090			    strerror(errno));
1091			pthread_mutex_unlock(&bufmgr_gem->lock);
1092			return ret;
1093		}
1094
1095		/* and mmap it */
1096		bo_gem->gtt_virtual = mmap(0, bo->size, PROT_READ | PROT_WRITE,
1097					   MAP_SHARED, bufmgr_gem->fd,
1098					   mmap_arg.offset);
1099		if (bo_gem->gtt_virtual == MAP_FAILED) {
1100			bo_gem->gtt_virtual = NULL;
1101			ret = -errno;
1102			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1103			    __FILE__, __LINE__,
1104			    bo_gem->gem_handle, bo_gem->name,
1105			    strerror(errno));
1106			pthread_mutex_unlock(&bufmgr_gem->lock);
1107			return ret;
1108		}
1109	}
1110
1111	bo->virtual = bo_gem->gtt_virtual;
1112
1113	DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1114	    bo_gem->gtt_virtual);
1115
1116	/* Now move it to the GTT domain so that the CPU caches are flushed */
1117	set_domain.handle = bo_gem->gem_handle;
1118	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1119	set_domain.write_domain = I915_GEM_DOMAIN_GTT;
1120	ret = drmIoctl(bufmgr_gem->fd,
1121		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1122		       &set_domain);
1123	if (ret != 0) {
1124		DBG("%s:%d: Error setting domain %d: %s\n",
1125		    __FILE__, __LINE__, bo_gem->gem_handle,
1126		    strerror(errno));
1127	}
1128
1129	pthread_mutex_unlock(&bufmgr_gem->lock);
1130
1131	return 0;
1132}
1133
1134int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo)
1135{
1136	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1137	int ret = 0;
1138
1139	if (bo == NULL)
1140		return 0;
1141
1142	pthread_mutex_lock(&bufmgr_gem->lock);
1143	bo->virtual = NULL;
1144	pthread_mutex_unlock(&bufmgr_gem->lock);
1145
1146	return ret;
1147}
1148
1149static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
1150{
1151	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1152	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1153	struct drm_i915_gem_sw_finish sw_finish;
1154	int ret;
1155
1156	if (bo == NULL)
1157		return 0;
1158
1159	pthread_mutex_lock(&bufmgr_gem->lock);
1160
1161	/* Cause a flush to happen if the buffer's pinned for scanout, so the
1162	 * results show up in a timely manner.
1163	 */
1164	sw_finish.handle = bo_gem->gem_handle;
1165	ret = drmIoctl(bufmgr_gem->fd,
1166		       DRM_IOCTL_I915_GEM_SW_FINISH,
1167		       &sw_finish);
1168	ret = ret == -1 ? -errno : 0;
1169
1170	bo->virtual = NULL;
1171	pthread_mutex_unlock(&bufmgr_gem->lock);
1172
1173	return ret;
1174}
1175
1176static int
1177drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
1178			 unsigned long size, const void *data)
1179{
1180	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1181	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1182	struct drm_i915_gem_pwrite pwrite;
1183	int ret;
1184
1185	memset(&pwrite, 0, sizeof(pwrite));
1186	pwrite.handle = bo_gem->gem_handle;
1187	pwrite.offset = offset;
1188	pwrite.size = size;
1189	pwrite.data_ptr = (uint64_t) (uintptr_t) data;
1190	ret = drmIoctl(bufmgr_gem->fd,
1191		       DRM_IOCTL_I915_GEM_PWRITE,
1192		       &pwrite);
1193	if (ret != 0) {
1194		ret = -errno;
1195		DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
1196		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1197		    (int)size, strerror(errno));
1198	}
1199
1200	return ret;
1201}
1202
1203static int
1204drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id)
1205{
1206	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1207	struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id;
1208	int ret;
1209
1210	get_pipe_from_crtc_id.crtc_id = crtc_id;
1211	ret = drmIoctl(bufmgr_gem->fd,
1212		       DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID,
1213		       &get_pipe_from_crtc_id);
1214	if (ret != 0) {
1215		/* We return -1 here to signal that we don't
1216		 * know which pipe is associated with this crtc.
1217		 * This lets the caller know that this information
1218		 * isn't available; using the wrong pipe for
1219		 * vblank waiting can cause the chipset to lock up
1220		 */
1221		return -1;
1222	}
1223
1224	return get_pipe_from_crtc_id.pipe;
1225}
1226
1227static int
1228drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
1229			     unsigned long size, void *data)
1230{
1231	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1232	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1233	struct drm_i915_gem_pread pread;
1234	int ret;
1235
1236	memset(&pread, 0, sizeof(pread));
1237	pread.handle = bo_gem->gem_handle;
1238	pread.offset = offset;
1239	pread.size = size;
1240	pread.data_ptr = (uint64_t) (uintptr_t) data;
1241	ret = drmIoctl(bufmgr_gem->fd,
1242		       DRM_IOCTL_I915_GEM_PREAD,
1243		       &pread);
1244	if (ret != 0) {
1245		ret = -errno;
1246		DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
1247		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1248		    (int)size, strerror(errno));
1249	}
1250
1251	return ret;
1252}
1253
1254/** Waits for all GPU rendering with the object to have completed. */
1255static void
1256drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo)
1257{
1258	drm_intel_gem_bo_start_gtt_access(bo, 1);
1259}
1260
1261/**
1262 * Sets the object to the GTT read and possibly write domain, used by the X
1263 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt().
1264 *
1265 * In combination with drm_intel_gem_bo_pin() and manual fence management, we
1266 * can do tiled pixmaps this way.
1267 */
1268void
1269drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable)
1270{
1271	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1272	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1273	struct drm_i915_gem_set_domain set_domain;
1274	int ret;
1275
1276	set_domain.handle = bo_gem->gem_handle;
1277	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1278	set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
1279	ret = drmIoctl(bufmgr_gem->fd,
1280		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1281		       &set_domain);
1282	if (ret != 0) {
1283		DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
1284		    __FILE__, __LINE__, bo_gem->gem_handle,
1285		    set_domain.read_domains, set_domain.write_domain,
1286		    strerror(errno));
1287	}
1288}
1289
1290static void
1291drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
1292{
1293	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1294	int i;
1295
1296	free(bufmgr_gem->exec2_objects);
1297	free(bufmgr_gem->exec_objects);
1298	free(bufmgr_gem->exec_bos);
1299
1300	pthread_mutex_destroy(&bufmgr_gem->lock);
1301
1302	/* Free any cached buffer objects we were going to reuse */
1303	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
1304		struct drm_intel_gem_bo_bucket *bucket =
1305		    &bufmgr_gem->cache_bucket[i];
1306		drm_intel_bo_gem *bo_gem;
1307
1308		while (!DRMLISTEMPTY(&bucket->head)) {
1309			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1310					      bucket->head.next, head);
1311			DRMLISTDEL(&bo_gem->head);
1312
1313			drm_intel_gem_bo_free(&bo_gem->bo);
1314		}
1315	}
1316
1317	free(bufmgr);
1318}
1319
1320/**
1321 * Adds the target buffer to the validation list and adds the relocation
1322 * to the reloc_buffer's relocation list.
1323 *
1324 * The relocation entry at the given offset must already contain the
1325 * precomputed relocation value, because the kernel will optimize out
1326 * the relocation entry write when the buffer hasn't moved from the
1327 * last known offset in target_bo.
1328 */
1329static int
1330do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
1331		 drm_intel_bo *target_bo, uint32_t target_offset,
1332		 uint32_t read_domains, uint32_t write_domain,
1333		 bool need_fence)
1334{
1335	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1336	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1337	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
1338	bool fenced_command;
1339
1340	if (bo_gem->has_error)
1341		return -ENOMEM;
1342
1343	if (target_bo_gem->has_error) {
1344		bo_gem->has_error = true;
1345		return -ENOMEM;
1346	}
1347
1348	/* We never use HW fences for rendering on 965+ */
1349	if (bufmgr_gem->gen >= 4)
1350		need_fence = false;
1351
1352	fenced_command = need_fence;
1353	if (target_bo_gem->tiling_mode == I915_TILING_NONE)
1354		need_fence = false;
1355
1356	/* Create a new relocation list if needed */
1357	if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo))
1358		return -ENOMEM;
1359
1360	/* Check overflow */
1361	assert(bo_gem->reloc_count < bufmgr_gem->max_relocs);
1362
1363	/* Check args */
1364	assert(offset <= bo->size - 4);
1365	assert((write_domain & (write_domain - 1)) == 0);
1366
1367	/* Make sure that we're not adding a reloc to something whose size has
1368	 * already been accounted for.
1369	 */
1370	assert(!bo_gem->used_as_reloc_target);
1371	if (target_bo_gem != bo_gem) {
1372		target_bo_gem->used_as_reloc_target = true;
1373		bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
1374	}
1375	/* An object needing a fence is a tiled buffer, so it won't have
1376	 * relocs to other buffers.
1377	 */
1378	if (need_fence)
1379		target_bo_gem->reloc_tree_fences = 1;
1380	bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
1381
1382	bo_gem->relocs[bo_gem->reloc_count].offset = offset;
1383	bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
1384	bo_gem->relocs[bo_gem->reloc_count].target_handle =
1385	    target_bo_gem->gem_handle;
1386	bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
1387	bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
1388	bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset;
1389
1390	bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
1391	if (target_bo != bo)
1392		drm_intel_gem_bo_reference(target_bo);
1393	if (fenced_command)
1394		bo_gem->reloc_target_info[bo_gem->reloc_count].flags =
1395			DRM_INTEL_RELOC_FENCE;
1396	else
1397		bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
1398
1399	bo_gem->reloc_count++;
1400
1401	return 0;
1402}
1403
1404static int
1405drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
1406			    drm_intel_bo *target_bo, uint32_t target_offset,
1407			    uint32_t read_domains, uint32_t write_domain)
1408{
1409	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
1410
1411	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
1412				read_domains, write_domain,
1413				!bufmgr_gem->fenced_relocs);
1414}
1415
1416static int
1417drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
1418				  drm_intel_bo *target_bo,
1419				  uint32_t target_offset,
1420				  uint32_t read_domains, uint32_t write_domain)
1421{
1422	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
1423				read_domains, write_domain, true);
1424}
1425
1426int
1427drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo)
1428{
1429	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1430
1431	return bo_gem->reloc_count;
1432}
1433
1434/**
1435 * Removes existing relocation entries in the BO after "start".
1436 *
1437 * This allows a user to avoid a two-step process for state setup with
1438 * counting up all the buffer objects and doing a
1439 * drm_intel_bufmgr_check_aperture_space() before emitting any of the
1440 * relocations for the state setup.  Instead, save the state of the
1441 * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the
1442 * state, and then check if it still fits in the aperture.
1443 *
1444 * Any further drm_intel_bufmgr_check_aperture_space() queries
1445 * involving this buffer in the tree are undefined after this call.
1446 */
1447void
1448drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start)
1449{
1450	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1451	int i;
1452	struct timespec time;
1453
1454	clock_gettime(CLOCK_MONOTONIC, &time);
1455
1456	assert(bo_gem->reloc_count >= start);
1457	/* Unreference the cleared target buffers */
1458	for (i = start; i < bo_gem->reloc_count; i++) {
1459		if (bo_gem->reloc_target_info[i].bo != bo) {
1460			drm_intel_gem_bo_unreference_locked_timed(bo_gem->
1461								  reloc_target_info[i].bo,
1462								  time.tv_sec);
1463		}
1464	}
1465	bo_gem->reloc_count = start;
1466}
1467
1468/**
1469 * Walk the tree of relocations rooted at BO and accumulate the list of
1470 * validations to be performed and update the relocation buffers with
1471 * index values into the validation list.
1472 */
1473static void
1474drm_intel_gem_bo_process_reloc(drm_intel_bo *bo)
1475{
1476	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1477	int i;
1478
1479	if (bo_gem->relocs == NULL)
1480		return;
1481
1482	for (i = 0; i < bo_gem->reloc_count; i++) {
1483		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
1484
1485		if (target_bo == bo)
1486			continue;
1487
1488		/* Continue walking the tree depth-first. */
1489		drm_intel_gem_bo_process_reloc(target_bo);
1490
1491		/* Add the target to the validate list */
1492		drm_intel_add_validate_buffer(target_bo);
1493	}
1494}
1495
1496static void
1497drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
1498{
1499	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1500	int i;
1501
1502	if (bo_gem->relocs == NULL)
1503		return;
1504
1505	for (i = 0; i < bo_gem->reloc_count; i++) {
1506		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
1507		int need_fence;
1508
1509		if (target_bo == bo)
1510			continue;
1511
1512		/* Continue walking the tree depth-first. */
1513		drm_intel_gem_bo_process_reloc2(target_bo);
1514
1515		need_fence = (bo_gem->reloc_target_info[i].flags &
1516			      DRM_INTEL_RELOC_FENCE);
1517
1518		/* Add the target to the validate list */
1519		drm_intel_add_validate_buffer2(target_bo, need_fence);
1520	}
1521}
1522
1523
1524static void
1525drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem)
1526{
1527	int i;
1528
1529	for (i = 0; i < bufmgr_gem->exec_count; i++) {
1530		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
1531		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1532
1533		/* Update the buffer offset */
1534		if (bufmgr_gem->exec_objects[i].offset != bo->offset) {
1535			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
1536			    bo_gem->gem_handle, bo_gem->name, bo->offset,
1537			    (unsigned long long)bufmgr_gem->exec_objects[i].
1538			    offset);
1539			bo->offset = bufmgr_gem->exec_objects[i].offset;
1540		}
1541	}
1542}
1543
1544static void
1545drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
1546{
1547	int i;
1548
1549	for (i = 0; i < bufmgr_gem->exec_count; i++) {
1550		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
1551		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1552
1553		/* Update the buffer offset */
1554		if (bufmgr_gem->exec2_objects[i].offset != bo->offset) {
1555			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
1556			    bo_gem->gem_handle, bo_gem->name, bo->offset,
1557			    (unsigned long long)bufmgr_gem->exec2_objects[i].offset);
1558			bo->offset = bufmgr_gem->exec2_objects[i].offset;
1559		}
1560	}
1561}
1562
1563static int
1564drm_intel_gem_bo_exec(drm_intel_bo *bo, int used,
1565		      drm_clip_rect_t * cliprects, int num_cliprects, int DR4)
1566{
1567	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1568	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1569	struct drm_i915_gem_execbuffer execbuf;
1570	int ret, i;
1571
1572	if (bo_gem->has_error)
1573		return -ENOMEM;
1574
1575	pthread_mutex_lock(&bufmgr_gem->lock);
1576	/* Update indices and set up the validate list. */
1577	drm_intel_gem_bo_process_reloc(bo);
1578
1579	/* Add the batch buffer to the validation list.  There are no
1580	 * relocations pointing to it.
1581	 */
1582	drm_intel_add_validate_buffer(bo);
1583
1584	execbuf.buffers_ptr = (uintptr_t) bufmgr_gem->exec_objects;
1585	execbuf.buffer_count = bufmgr_gem->exec_count;
1586	execbuf.batch_start_offset = 0;
1587	execbuf.batch_len = used;
1588	execbuf.cliprects_ptr = (uintptr_t) cliprects;
1589	execbuf.num_cliprects = num_cliprects;
1590	execbuf.DR1 = 0;
1591	execbuf.DR4 = DR4;
1592
1593	ret = drmIoctl(bufmgr_gem->fd,
1594		       DRM_IOCTL_I915_GEM_EXECBUFFER,
1595		       &execbuf);
1596	if (ret != 0) {
1597		ret = -errno;
1598		if (errno == ENOSPC) {
1599			DBG("Execbuffer fails to pin. "
1600			    "Estimate: %u. Actual: %u. Available: %u\n",
1601			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
1602							       bufmgr_gem->
1603							       exec_count),
1604			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
1605							      bufmgr_gem->
1606							      exec_count),
1607			    (unsigned int)bufmgr_gem->gtt_size);
1608		}
1609	}
1610	drm_intel_update_buffer_offsets(bufmgr_gem);
1611
1612	if (bufmgr_gem->bufmgr.debug)
1613		drm_intel_gem_dump_validation_list(bufmgr_gem);
1614
1615	for (i = 0; i < bufmgr_gem->exec_count; i++) {
1616		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
1617		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1618
1619		/* Disconnect the buffer from the validate list */
1620		bo_gem->validate_index = -1;
1621		bufmgr_gem->exec_bos[i] = NULL;
1622	}
1623	bufmgr_gem->exec_count = 0;
1624	pthread_mutex_unlock(&bufmgr_gem->lock);
1625
1626	return ret;
1627}
1628
1629static int
1630drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
1631			drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
1632			unsigned int flags)
1633{
1634	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
1635	struct drm_i915_gem_execbuffer2 execbuf;
1636	int ret, i;
1637
1638	switch (flags & 0x7) {
1639	default:
1640		return -EINVAL;
1641	case I915_EXEC_BLT:
1642		if (!bufmgr_gem->has_blt)
1643			return -EINVAL;
1644		break;
1645	case I915_EXEC_BSD:
1646		if (!bufmgr_gem->has_bsd)
1647			return -EINVAL;
1648		break;
1649	case I915_EXEC_RENDER:
1650	case I915_EXEC_DEFAULT:
1651		break;
1652	}
1653
1654	pthread_mutex_lock(&bufmgr_gem->lock);
1655	/* Update indices and set up the validate list. */
1656	drm_intel_gem_bo_process_reloc2(bo);
1657
1658	/* Add the batch buffer to the validation list.  There are no relocations
1659	 * pointing to it.
1660	 */
1661	drm_intel_add_validate_buffer2(bo, 0);
1662
1663	execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
1664	execbuf.buffer_count = bufmgr_gem->exec_count;
1665	execbuf.batch_start_offset = 0;
1666	execbuf.batch_len = used;
1667	execbuf.cliprects_ptr = (uintptr_t)cliprects;
1668	execbuf.num_cliprects = num_cliprects;
1669	execbuf.DR1 = 0;
1670	execbuf.DR4 = DR4;
1671	execbuf.flags = flags;
1672	execbuf.rsvd1 = 0;
1673	execbuf.rsvd2 = 0;
1674
1675	ret = drmIoctl(bufmgr_gem->fd,
1676		       DRM_IOCTL_I915_GEM_EXECBUFFER2,
1677		       &execbuf);
1678	if (ret != 0) {
1679		ret = -errno;
1680		if (ret == -ENOSPC) {
1681			DBG("Execbuffer fails to pin. "
1682			    "Estimate: %u. Actual: %u. Available: %u\n",
1683			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
1684							       bufmgr_gem->exec_count),
1685			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
1686							      bufmgr_gem->exec_count),
1687			    (unsigned int) bufmgr_gem->gtt_size);
1688		}
1689	}
1690	drm_intel_update_buffer_offsets2(bufmgr_gem);
1691
1692	if (bufmgr_gem->bufmgr.debug)
1693		drm_intel_gem_dump_validation_list(bufmgr_gem);
1694
1695	for (i = 0; i < bufmgr_gem->exec_count; i++) {
1696		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
1697		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1698
1699		/* Disconnect the buffer from the validate list */
1700		bo_gem->validate_index = -1;
1701		bufmgr_gem->exec_bos[i] = NULL;
1702	}
1703	bufmgr_gem->exec_count = 0;
1704	pthread_mutex_unlock(&bufmgr_gem->lock);
1705
1706	return ret;
1707}
1708
1709static int
1710drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used,
1711		       drm_clip_rect_t *cliprects, int num_cliprects,
1712		       int DR4)
1713{
1714	return drm_intel_gem_bo_mrb_exec2(bo, used,
1715					cliprects, num_cliprects, DR4,
1716					I915_EXEC_RENDER);
1717}
1718
1719static int
1720drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment)
1721{
1722	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1723	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1724	struct drm_i915_gem_pin pin;
1725	int ret;
1726
1727	memset(&pin, 0, sizeof(pin));
1728	pin.handle = bo_gem->gem_handle;
1729	pin.alignment = alignment;
1730
1731	ret = drmIoctl(bufmgr_gem->fd,
1732		       DRM_IOCTL_I915_GEM_PIN,
1733		       &pin);
1734	if (ret != 0)
1735		return -errno;
1736
1737	bo->offset = pin.offset;
1738	return 0;
1739}
1740
1741static int
1742drm_intel_gem_bo_unpin(drm_intel_bo *bo)
1743{
1744	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1745	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1746	struct drm_i915_gem_unpin unpin;
1747	int ret;
1748
1749	memset(&unpin, 0, sizeof(unpin));
1750	unpin.handle = bo_gem->gem_handle;
1751
1752	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin);
1753	if (ret != 0)
1754		return -errno;
1755
1756	return 0;
1757}
1758
1759static int
1760drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
1761				     uint32_t tiling_mode,
1762				     uint32_t stride)
1763{
1764	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1765	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1766	struct drm_i915_gem_set_tiling set_tiling;
1767	int ret;
1768
1769	if (bo_gem->global_name == 0 &&
1770	    tiling_mode == bo_gem->tiling_mode &&
1771	    stride == bo_gem->stride)
1772		return 0;
1773
1774	memset(&set_tiling, 0, sizeof(set_tiling));
1775	do {
1776		/* set_tiling is slightly broken and overwrites the
1777		 * input on the error path, so we have to open code
1778		 * rmIoctl.
1779		 */
1780		set_tiling.handle = bo_gem->gem_handle;
1781		set_tiling.tiling_mode = tiling_mode;
1782		set_tiling.stride = stride;
1783
1784		ret = ioctl(bufmgr_gem->fd,
1785			    DRM_IOCTL_I915_GEM_SET_TILING,
1786			    &set_tiling);
1787	} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
1788	if (ret == -1)
1789		return -errno;
1790
1791	bo_gem->tiling_mode = set_tiling.tiling_mode;
1792	bo_gem->swizzle_mode = set_tiling.swizzle_mode;
1793	bo_gem->stride = set_tiling.stride;
1794	return 0;
1795}
1796
1797static int
1798drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
1799			    uint32_t stride)
1800{
1801	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1802	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1803	int ret;
1804
1805	/* Linear buffers have no stride. By ensuring that we only ever use
1806	 * stride 0 with linear buffers, we simplify our code.
1807	 */
1808	if (*tiling_mode == I915_TILING_NONE)
1809		stride = 0;
1810
1811	ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride);
1812	if (ret == 0)
1813		drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
1814
1815	*tiling_mode = bo_gem->tiling_mode;
1816	return ret;
1817}
1818
1819static int
1820drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
1821			    uint32_t * swizzle_mode)
1822{
1823	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1824
1825	*tiling_mode = bo_gem->tiling_mode;
1826	*swizzle_mode = bo_gem->swizzle_mode;
1827	return 0;
1828}
1829
1830static int
1831drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name)
1832{
1833	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1834	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1835	struct drm_gem_flink flink;
1836	int ret;
1837
1838	if (!bo_gem->global_name) {
1839		memset(&flink, 0, sizeof(flink));
1840		flink.handle = bo_gem->gem_handle;
1841
1842		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink);
1843		if (ret != 0)
1844			return -errno;
1845		bo_gem->global_name = flink.name;
1846		bo_gem->reusable = false;
1847
1848		DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
1849	}
1850
1851	*name = bo_gem->global_name;
1852	return 0;
1853}
1854
1855/**
1856 * Enables unlimited caching of buffer objects for reuse.
1857 *
1858 * This is potentially very memory expensive, as the cache at each bucket
1859 * size is only bounded by how many buffers of that size we've managed to have
1860 * in flight at once.
1861 */
1862void
1863drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr)
1864{
1865	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1866
1867	bufmgr_gem->bo_reuse = true;
1868}
1869
1870/**
1871 * Enable use of fenced reloc type.
1872 *
1873 * New code should enable this to avoid unnecessary fence register
1874 * allocation.  If this option is not enabled, all relocs will have fence
1875 * register allocated.
1876 */
1877void
1878drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr)
1879{
1880	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
1881
1882	if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2)
1883		bufmgr_gem->fenced_relocs = true;
1884}
1885
1886/**
1887 * Return the additional aperture space required by the tree of buffer objects
1888 * rooted at bo.
1889 */
1890static int
1891drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo)
1892{
1893	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1894	int i;
1895	int total = 0;
1896
1897	if (bo == NULL || bo_gem->included_in_check_aperture)
1898		return 0;
1899
1900	total += bo->size;
1901	bo_gem->included_in_check_aperture = true;
1902
1903	for (i = 0; i < bo_gem->reloc_count; i++)
1904		total +=
1905		    drm_intel_gem_bo_get_aperture_space(bo_gem->
1906							reloc_target_info[i].bo);
1907
1908	return total;
1909}
1910
1911/**
1912 * Count the number of buffers in this list that need a fence reg
1913 *
1914 * If the count is greater than the number of available regs, we'll have
1915 * to ask the caller to resubmit a batch with fewer tiled buffers.
1916 *
1917 * This function over-counts if the same buffer is used multiple times.
1918 */
1919static unsigned int
1920drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count)
1921{
1922	int i;
1923	unsigned int total = 0;
1924
1925	for (i = 0; i < count; i++) {
1926		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
1927
1928		if (bo_gem == NULL)
1929			continue;
1930
1931		total += bo_gem->reloc_tree_fences;
1932	}
1933	return total;
1934}
1935
1936/**
1937 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready
1938 * for the next drm_intel_bufmgr_check_aperture_space() call.
1939 */
1940static void
1941drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo)
1942{
1943	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1944	int i;
1945
1946	if (bo == NULL || !bo_gem->included_in_check_aperture)
1947		return;
1948
1949	bo_gem->included_in_check_aperture = false;
1950
1951	for (i = 0; i < bo_gem->reloc_count; i++)
1952		drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->
1953							   reloc_target_info[i].bo);
1954}
1955
1956/**
1957 * Return a conservative estimate for the amount of aperture required
1958 * for a collection of buffers. This may double-count some buffers.
1959 */
1960static unsigned int
1961drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count)
1962{
1963	int i;
1964	unsigned int total = 0;
1965
1966	for (i = 0; i < count; i++) {
1967		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
1968		if (bo_gem != NULL)
1969			total += bo_gem->reloc_tree_size;
1970	}
1971	return total;
1972}
1973
1974/**
1975 * Return the amount of aperture needed for a collection of buffers.
1976 * This avoids double counting any buffers, at the cost of looking
1977 * at every buffer in the set.
1978 */
1979static unsigned int
1980drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count)
1981{
1982	int i;
1983	unsigned int total = 0;
1984
1985	for (i = 0; i < count; i++) {
1986		total += drm_intel_gem_bo_get_aperture_space(bo_array[i]);
1987		/* For the first buffer object in the array, we get an
1988		 * accurate count back for its reloc_tree size (since nothing
1989		 * had been flagged as being counted yet).  We can save that
1990		 * value out as a more conservative reloc_tree_size that
1991		 * avoids double-counting target buffers.  Since the first
1992		 * buffer happens to usually be the batch buffer in our
1993		 * callers, this can pull us back from doing the tree
1994		 * walk on every new batch emit.
1995		 */
1996		if (i == 0) {
1997			drm_intel_bo_gem *bo_gem =
1998			    (drm_intel_bo_gem *) bo_array[i];
1999			bo_gem->reloc_tree_size = total;
2000		}
2001	}
2002
2003	for (i = 0; i < count; i++)
2004		drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]);
2005	return total;
2006}
2007
2008/**
2009 * Return -1 if the batchbuffer should be flushed before attempting to
2010 * emit rendering referencing the buffers pointed to by bo_array.
2011 *
2012 * This is required because if we try to emit a batchbuffer with relocations
2013 * to a tree of buffers that won't simultaneously fit in the aperture,
2014 * the rendering will return an error at a point where the software is not
2015 * prepared to recover from it.
2016 *
2017 * However, we also want to emit the batchbuffer significantly before we reach
2018 * the limit, as a series of batchbuffers each of which references buffers
2019 * covering almost all of the aperture means that at each emit we end up
2020 * waiting to evict a buffer from the last rendering, and we get synchronous
2021 * performance.  By emitting smaller batchbuffers, we eat some CPU overhead to
2022 * get better parallelism.
2023 */
2024static int
2025drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count)
2026{
2027	drm_intel_bufmgr_gem *bufmgr_gem =
2028	    (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr;
2029	unsigned int total = 0;
2030	unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4;
2031	int total_fences;
2032
2033	/* Check for fence reg constraints if necessary */
2034	if (bufmgr_gem->available_fences) {
2035		total_fences = drm_intel_gem_total_fences(bo_array, count);
2036		if (total_fences > bufmgr_gem->available_fences)
2037			return -ENOSPC;
2038	}
2039
2040	total = drm_intel_gem_estimate_batch_space(bo_array, count);
2041
2042	if (total > threshold)
2043		total = drm_intel_gem_compute_batch_space(bo_array, count);
2044
2045	if (total > threshold) {
2046		DBG("check_space: overflowed available aperture, "
2047		    "%dkb vs %dkb\n",
2048		    total / 1024, (int)bufmgr_gem->gtt_size / 1024);
2049		return -ENOSPC;
2050	} else {
2051		DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024,
2052		    (int)bufmgr_gem->gtt_size / 1024);
2053		return 0;
2054	}
2055}
2056
2057/*
2058 * Disable buffer reuse for objects which are shared with the kernel
2059 * as scanout buffers
2060 */
2061static int
2062drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo)
2063{
2064	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2065
2066	bo_gem->reusable = false;
2067	return 0;
2068}
2069
2070static int
2071drm_intel_gem_bo_is_reusable(drm_intel_bo *bo)
2072{
2073	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2074
2075	return bo_gem->reusable;
2076}
2077
2078static int
2079_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
2080{
2081	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2082	int i;
2083
2084	for (i = 0; i < bo_gem->reloc_count; i++) {
2085		if (bo_gem->reloc_target_info[i].bo == target_bo)
2086			return 1;
2087		if (bo == bo_gem->reloc_target_info[i].bo)
2088			continue;
2089		if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo,
2090						target_bo))
2091			return 1;
2092	}
2093
2094	return 0;
2095}
2096
2097/** Return true if target_bo is referenced by bo's relocation tree. */
2098static int
2099drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
2100{
2101	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
2102
2103	if (bo == NULL || target_bo == NULL)
2104		return 0;
2105	if (target_bo_gem->used_as_reloc_target)
2106		return _drm_intel_gem_bo_references(bo, target_bo);
2107	return 0;
2108}
2109
2110static void
2111add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size)
2112{
2113	unsigned int i = bufmgr_gem->num_buckets;
2114
2115	assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket));
2116
2117	DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
2118	bufmgr_gem->cache_bucket[i].size = size;
2119	bufmgr_gem->num_buckets++;
2120}
2121
2122static void
2123init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem)
2124{
2125	unsigned long size, cache_max_size = 64 * 1024 * 1024;
2126
2127	/* OK, so power of two buckets was too wasteful of memory.
2128	 * Give 3 other sizes between each power of two, to hopefully
2129	 * cover things accurately enough.  (The alternative is
2130	 * probably to just go for exact matching of sizes, and assume
2131	 * that for things like composited window resize the tiled
2132	 * width/height alignment and rounding of sizes to pages will
2133	 * get us useful cache hit rates anyway)
2134	 */
2135	add_bucket(bufmgr_gem, 4096);
2136	add_bucket(bufmgr_gem, 4096 * 2);
2137	add_bucket(bufmgr_gem, 4096 * 3);
2138
2139	/* Initialize the linked lists for BO reuse cache. */
2140	for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
2141		add_bucket(bufmgr_gem, size);
2142
2143		add_bucket(bufmgr_gem, size + size * 1 / 4);
2144		add_bucket(bufmgr_gem, size + size * 2 / 4);
2145		add_bucket(bufmgr_gem, size + size * 3 / 4);
2146	}
2147}
2148
2149/**
2150 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
2151 * and manage map buffer objections.
2152 *
2153 * \param fd File descriptor of the opened DRM device.
2154 */
2155drm_intel_bufmgr *
2156drm_intel_bufmgr_gem_init(int fd, int batch_size)
2157{
2158	drm_intel_bufmgr_gem *bufmgr_gem;
2159	struct drm_i915_gem_get_aperture aperture;
2160	drm_i915_getparam_t gp;
2161	int ret, tmp;
2162	bool exec2 = false;
2163
2164	bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
2165	if (bufmgr_gem == NULL)
2166		return NULL;
2167
2168	bufmgr_gem->fd = fd;
2169
2170	if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) {
2171		free(bufmgr_gem);
2172		return NULL;
2173	}
2174
2175	ret = drmIoctl(bufmgr_gem->fd,
2176		       DRM_IOCTL_I915_GEM_GET_APERTURE,
2177		       &aperture);
2178
2179	if (ret == 0)
2180		bufmgr_gem->gtt_size = aperture.aper_available_size;
2181	else {
2182		fprintf(stderr, "DRM_IOCTL_I915_GEM_APERTURE failed: %s\n",
2183			strerror(errno));
2184		bufmgr_gem->gtt_size = 128 * 1024 * 1024;
2185		fprintf(stderr, "Assuming %dkB available aperture size.\n"
2186			"May lead to reduced performance or incorrect "
2187			"rendering.\n",
2188			(int)bufmgr_gem->gtt_size / 1024);
2189	}
2190
2191	gp.param = I915_PARAM_CHIPSET_ID;
2192	gp.value = &bufmgr_gem->pci_device;
2193	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
2194	if (ret) {
2195		fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno);
2196		fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
2197	}
2198
2199	if (IS_GEN2(bufmgr_gem))
2200		bufmgr_gem->gen = 2;
2201	else if (IS_GEN3(bufmgr_gem))
2202		bufmgr_gem->gen = 3;
2203	else if (IS_GEN4(bufmgr_gem))
2204		bufmgr_gem->gen = 4;
2205	else
2206		bufmgr_gem->gen = 6;
2207
2208	gp.value = &tmp;
2209
2210	gp.param = I915_PARAM_HAS_EXECBUF2;
2211	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
2212	if (!ret)
2213		exec2 = true;
2214
2215	gp.param = I915_PARAM_HAS_BSD;
2216	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
2217	bufmgr_gem->has_bsd = ret == 0;
2218
2219	gp.param = I915_PARAM_HAS_BLT;
2220	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
2221	bufmgr_gem->has_blt = ret == 0;
2222
2223	gp.param = I915_PARAM_HAS_RELAXED_FENCING;
2224	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
2225	bufmgr_gem->has_relaxed_fencing = ret == 0;
2226
2227	if (bufmgr_gem->gen < 4) {
2228		gp.param = I915_PARAM_NUM_FENCES_AVAIL;
2229		gp.value = &bufmgr_gem->available_fences;
2230		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
2231		if (ret) {
2232			fprintf(stderr, "get fences failed: %d [%d]\n", ret,
2233				errno);
2234			fprintf(stderr, "param: %d, val: %d\n", gp.param,
2235				*gp.value);
2236			bufmgr_gem->available_fences = 0;
2237		} else {
2238			/* XXX The kernel reports the total number of fences,
2239			 * including any that may be pinned.
2240			 *
2241			 * We presume that there will be at least one pinned
2242			 * fence for the scanout buffer, but there may be more
2243			 * than one scanout and the user may be manually
2244			 * pinning buffers. Let's move to execbuffer2 and
2245			 * thereby forget the insanity of using fences...
2246			 */
2247			bufmgr_gem->available_fences -= 2;
2248			if (bufmgr_gem->available_fences < 0)
2249				bufmgr_gem->available_fences = 0;
2250		}
2251	}
2252
2253	/* Let's go with one relocation per every 2 dwords (but round down a bit
2254	 * since a power of two will mean an extra page allocation for the reloc
2255	 * buffer).
2256	 *
2257	 * Every 4 was too few for the blender benchmark.
2258	 */
2259	bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
2260
2261	bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc;
2262	bufmgr_gem->bufmgr.bo_alloc_for_render =
2263	    drm_intel_gem_bo_alloc_for_render;
2264	bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled;
2265	bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference;
2266	bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference;
2267	bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map;
2268	bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap;
2269	bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata;
2270	bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata;
2271	bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
2272	bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
2273	bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
2274	bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
2275	bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
2276	bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
2277	bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling;
2278	bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink;
2279	/* Use the new one if available */
2280	if (exec2) {
2281		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2;
2282		bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2;
2283	} else
2284		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec;
2285	bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy;
2286	bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise;
2287	bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy;
2288	bufmgr_gem->bufmgr.debug = 0;
2289	bufmgr_gem->bufmgr.check_aperture_space =
2290	    drm_intel_gem_check_aperture_space;
2291	bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse;
2292	bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable;
2293	bufmgr_gem->bufmgr.get_pipe_from_crtc_id =
2294	    drm_intel_gem_get_pipe_from_crtc_id;
2295	bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references;
2296
2297	DRMINITLISTHEAD(&bufmgr_gem->named);
2298	init_cache_buckets(bufmgr_gem);
2299
2300	return &bufmgr_gem->bufmgr;
2301}
2302