1/**************************************************************************
2 *
3 * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/* Originally a fake version of the buffer manager so that we can
29 * prototype the changes in a driver fairly quickly, has been fleshed
30 * out to a fully functional interim solution.
31 *
32 * Basically wraps the old style memory management in the new
33 * programming interface, but is more expressive and avoids many of
34 * the bugs in the old texture manager.
35 */
36
37#ifdef HAVE_CONFIG_H
38#include "config.h"
39#endif
40
41#include <stdlib.h>
42#include <string.h>
43#include <assert.h>
44#include <errno.h>
45#include <xf86drm.h>
46#include <pthread.h>
47#include "intel_bufmgr.h"
48#include "intel_bufmgr_priv.h"
49#include "drm.h"
50#include "i915_drm.h"
51#include "mm.h"
52#include "libdrm.h"
53#include "libdrm_lists.h"
54
55/* Support gcc's __FUNCTION__ for people using other compilers */
56#if !defined(__GNUC__) && !defined(__FUNCTION__)
57# define __FUNCTION__ __func__ /* C99 */
58#endif
59
60#define DBG(...) do {					\
61	if (bufmgr_fake->bufmgr.debug)			\
62		drmMsg(__VA_ARGS__);			\
63} while (0)
64
65/* Internal flags:
66 */
67#define BM_NO_BACKING_STORE			0x00000001
68#define BM_NO_FENCE_SUBDATA			0x00000002
69#define BM_PINNED				0x00000004
70
71/* Wrapper around mm.c's mem_block, which understands that you must
72 * wait for fences to expire before memory can be freed.  This is
73 * specific to our use of memcpy for uploads - an upload that was
74 * processed through the command queue wouldn't need to care about
75 * fences.
76 */
77#define MAX_RELOCS 4096
78
79struct fake_buffer_reloc {
80	/** Buffer object that the relocation points at. */
81	drm_intel_bo *target_buf;
82	/** Offset of the relocation entry within reloc_buf. */
83	uint32_t offset;
84	/**
85	 * Cached value of the offset when we last performed this relocation.
86	 */
87	uint32_t last_target_offset;
88	/** Value added to target_buf's offset to get the relocation entry. */
89	uint32_t delta;
90	/** Cache domains the target buffer is read into. */
91	uint32_t read_domains;
92	/** Cache domain the target buffer will have dirty cachelines in. */
93	uint32_t write_domain;
94};
95
96struct block {
97	struct block *next, *prev;
98	struct mem_block *mem;	/* BM_MEM_AGP */
99
100	/**
101	 * Marks that the block is currently in the aperture and has yet to be
102	 * fenced.
103	 */
104	unsigned on_hardware:1;
105	/**
106	 * Marks that the block is currently fenced (being used by rendering)
107	 * and can't be freed until @fence is passed.
108	 */
109	unsigned fenced:1;
110
111	/** Fence cookie for the block. */
112	unsigned fence;		/* Split to read_fence, write_fence */
113
114	drm_intel_bo *bo;
115	void *virtual;
116};
117
118typedef struct _bufmgr_fake {
119	drm_intel_bufmgr bufmgr;
120
121	pthread_mutex_t lock;
122
123	unsigned long low_offset;
124	unsigned long size;
125	void *virtual;
126
127	struct mem_block *heap;
128
129	unsigned buf_nr;	/* for generating ids */
130
131	/**
132	 * List of blocks which are currently in the GART but haven't been
133	 * fenced yet.
134	 */
135	struct block on_hardware;
136	/**
137	 * List of blocks which are in the GART and have an active fence on
138	 * them.
139	 */
140	struct block fenced;
141	/**
142	 * List of blocks which have an expired fence and are ready to be
143	 * evicted.
144	 */
145	struct block lru;
146
147	unsigned int last_fence;
148
149	unsigned fail:1;
150	unsigned need_fence:1;
151	int thrashing;
152
153	/**
154	 * Driver callback to emit a fence, returning the cookie.
155	 *
156	 * This allows the driver to hook in a replacement for the DRM usage in
157	 * bufmgr_fake.
158	 *
159	 * Currently, this also requires that a write flush be emitted before
160	 * emitting the fence, but this should change.
161	 */
162	unsigned int (*fence_emit) (void *private);
163	/** Driver callback to wait for a fence cookie to have passed. */
164	void (*fence_wait) (unsigned int fence, void *private);
165	void *fence_priv;
166
167	/**
168	 * Driver callback to execute a buffer.
169	 *
170	 * This allows the driver to hook in a replacement for the DRM usage in
171	 * bufmgr_fake.
172	 */
173	int (*exec) (drm_intel_bo *bo, unsigned int used, void *priv);
174	void *exec_priv;
175
176	/** Driver-supplied argument to driver callbacks */
177	void *driver_priv;
178	/**
179	 * Pointer to kernel-updated sarea data for the last completed user irq
180	 */
181	volatile int *last_dispatch;
182
183	int fd;
184
185	int debug;
186
187	int performed_rendering;
188} drm_intel_bufmgr_fake;
189
190typedef struct _drm_intel_bo_fake {
191	drm_intel_bo bo;
192
193	unsigned id;		/* debug only */
194	const char *name;
195
196	unsigned dirty:1;
197	/**
198	 * has the card written to this buffer - we make need to copy it back
199	 */
200	unsigned card_dirty:1;
201	unsigned int refcount;
202	/* Flags may consist of any of the DRM_BO flags, plus
203	 * DRM_BO_NO_BACKING_STORE and BM_NO_FENCE_SUBDATA, which are the
204	 * first two driver private flags.
205	 */
206	uint64_t flags;
207	/** Cache domains the target buffer is read into. */
208	uint32_t read_domains;
209	/** Cache domain the target buffer will have dirty cachelines in. */
210	uint32_t write_domain;
211
212	unsigned int alignment;
213	int is_static, validated;
214	unsigned int map_count;
215
216	/** relocation list */
217	struct fake_buffer_reloc *relocs;
218	int nr_relocs;
219	/**
220	 * Total size of the target_bos of this buffer.
221	 *
222	 * Used for estimation in check_aperture.
223	 */
224	unsigned int child_size;
225
226	struct block *block;
227	void *backing_store;
228	void (*invalidate_cb) (drm_intel_bo *bo, void *ptr);
229	void *invalidate_ptr;
230} drm_intel_bo_fake;
231
232static int clear_fenced(drm_intel_bufmgr_fake *bufmgr_fake,
233			unsigned int fence_cookie);
234
235#define MAXFENCE 0x7fffffff
236
237static int
238FENCE_LTE(unsigned a, unsigned b)
239{
240	if (a == b)
241		return 1;
242
243	if (a < b && b - a < (1 << 24))
244		return 1;
245
246	if (a > b && MAXFENCE - a + b < (1 << 24))
247		return 1;
248
249	return 0;
250}
251
252drm_public void
253drm_intel_bufmgr_fake_set_fence_callback(drm_intel_bufmgr *bufmgr,
254					 unsigned int (*emit) (void *priv),
255					 void (*wait) (unsigned int fence,
256						       void *priv),
257					 void *priv)
258{
259	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
260
261	bufmgr_fake->fence_emit = emit;
262	bufmgr_fake->fence_wait = wait;
263	bufmgr_fake->fence_priv = priv;
264}
265
266static unsigned int
267_fence_emit_internal(drm_intel_bufmgr_fake *bufmgr_fake)
268{
269	struct drm_i915_irq_emit ie;
270	int ret, seq = 1;
271
272	if (bufmgr_fake->fence_emit != NULL) {
273		seq = bufmgr_fake->fence_emit(bufmgr_fake->fence_priv);
274		return seq;
275	}
276
277	ie.irq_seq = &seq;
278	ret = drmCommandWriteRead(bufmgr_fake->fd, DRM_I915_IRQ_EMIT,
279				  &ie, sizeof(ie));
280	if (ret) {
281		drmMsg("%s: drm_i915_irq_emit: %d\n", __FUNCTION__, ret);
282		abort();
283	}
284
285	DBG("emit 0x%08x\n", seq);
286	return seq;
287}
288
289static void
290_fence_wait_internal(drm_intel_bufmgr_fake *bufmgr_fake, int seq)
291{
292	struct drm_i915_irq_wait iw;
293	int hw_seq, busy_count = 0;
294	int ret;
295	int kernel_lied;
296
297	if (bufmgr_fake->fence_wait != NULL) {
298		bufmgr_fake->fence_wait(seq, bufmgr_fake->fence_priv);
299		clear_fenced(bufmgr_fake, seq);
300		return;
301	}
302
303	iw.irq_seq = seq;
304
305	DBG("wait 0x%08x\n", iw.irq_seq);
306
307	/* The kernel IRQ_WAIT implementation is all sorts of broken.
308	 * 1) It returns 1 to 0x7fffffff instead of using the full 32-bit
309	 *    unsigned range.
310	 * 2) It returns 0 if hw_seq >= seq, not seq - hw_seq < 0 on the 32-bit
311	 *    signed range.
312	 * 3) It waits if seq < hw_seq, not seq - hw_seq > 0 on the 32-bit
313	 *    signed range.
314	 * 4) It returns -EBUSY in 3 seconds even if the hardware is still
315	 *    successfully chewing through buffers.
316	 *
317	 * Assume that in userland we treat sequence numbers as ints, which
318	 * makes some of the comparisons convenient, since the sequence
319	 * numbers are all postive signed integers.
320	 *
321	 * From this we get several cases we need to handle.  Here's a timeline.
322	 * 0x2   0x7                                    0x7ffffff8   0x7ffffffd
323	 *   |    |                                             |    |
324	 * ------------------------------------------------------------
325	 *
326	 * A) Normal wait for hw to catch up
327	 * hw_seq seq
328	 *   |    |
329	 * ------------------------------------------------------------
330	 * seq - hw_seq = 5.  If we call IRQ_WAIT, it will wait for hw to
331	 * catch up.
332	 *
333	 * B) Normal wait for a sequence number that's already passed.
334	 * seq    hw_seq
335	 *   |    |
336	 * ------------------------------------------------------------
337	 * seq - hw_seq = -5.  If we call IRQ_WAIT, it returns 0 quickly.
338	 *
339	 * C) Hardware has already wrapped around ahead of us
340	 * hw_seq                                                    seq
341	 *   |                                                       |
342	 * ------------------------------------------------------------
343	 * seq - hw_seq = 0x80000000 - 5.  If we called IRQ_WAIT, it would wait
344	 * for hw_seq >= seq, which may never occur.  Thus, we want to catch
345	 * this in userland and return 0.
346	 *
347	 * D) We've wrapped around ahead of the hardware.
348	 * seq                                                      hw_seq
349	 *   |                                                       |
350	 * ------------------------------------------------------------
351	 * seq - hw_seq = -(0x80000000 - 5).  If we called IRQ_WAIT, it would
352	 * return 0 quickly because hw_seq >= seq, even though the hardware
353	 * isn't caught up. Thus, we need to catch this early return in
354	 * userland and bother the kernel until the hardware really does
355	 * catch up.
356	 *
357	 * E) Hardware might wrap after we test in userland.
358	 *                                                  hw_seq  seq
359	 *                                                      |    |
360	 * ------------------------------------------------------------
361	 * seq - hw_seq = 5.  If we call IRQ_WAIT, it will likely see seq >=
362	 * hw_seq and wait.  However, suppose hw_seq wraps before we make it
363	 * into the kernel.  The kernel sees hw_seq >= seq and waits for 3
364	 * seconds then returns -EBUSY.  This is case C).  We should catch
365	 * this and then return successfully.
366	 *
367	 * F) Hardware might take a long time on a buffer.
368	 * hw_seq seq
369	 *   |    |
370	 * -------------------------------------------------------------------
371	 * seq - hw_seq = 5.  If we call IRQ_WAIT, if sequence 2 through 5
372	 * take too long, it will return -EBUSY.  Batchbuffers in the
373	 * gltestperf demo were seen to take up to 7 seconds.  We should
374	 * catch early -EBUSY return and keep trying.
375	 */
376
377	do {
378		/* Keep a copy of last_dispatch so that if the wait -EBUSYs
379		 * because the hardware didn't catch up in 3 seconds, we can
380		 * see if it at least made progress and retry.
381		 */
382		hw_seq = *bufmgr_fake->last_dispatch;
383
384		/* Catch case C */
385		if (seq - hw_seq > 0x40000000)
386			return;
387
388		ret = drmCommandWrite(bufmgr_fake->fd, DRM_I915_IRQ_WAIT,
389				      &iw, sizeof(iw));
390		/* Catch case D */
391		kernel_lied = (ret == 0) && (seq - *bufmgr_fake->last_dispatch <
392					     -0x40000000);
393
394		/* Catch case E */
395		if (ret == -EBUSY
396		    && (seq - *bufmgr_fake->last_dispatch > 0x40000000))
397			ret = 0;
398
399		/* Catch case F: Allow up to 15 seconds chewing on one buffer. */
400		if ((ret == -EBUSY) && (hw_seq != *bufmgr_fake->last_dispatch))
401			busy_count = 0;
402		else
403			busy_count++;
404	} while (kernel_lied || ret == -EAGAIN || ret == -EINTR ||
405		 (ret == -EBUSY && busy_count < 5));
406
407	if (ret != 0) {
408		drmMsg("%s:%d: Error waiting for fence: %s.\n", __FILE__,
409		       __LINE__, strerror(-ret));
410		abort();
411	}
412	clear_fenced(bufmgr_fake, seq);
413}
414
415static int
416_fence_test(drm_intel_bufmgr_fake *bufmgr_fake, unsigned fence)
417{
418	/* Slight problem with wrap-around:
419	 */
420	return fence == 0 || FENCE_LTE(fence, bufmgr_fake->last_fence);
421}
422
423/**
424 * Allocate a memory manager block for the buffer.
425 */
426static int
427alloc_block(drm_intel_bo *bo)
428{
429	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
430	drm_intel_bufmgr_fake *bufmgr_fake =
431	    (drm_intel_bufmgr_fake *) bo->bufmgr;
432	struct block *block = (struct block *)calloc(sizeof *block, 1);
433	unsigned int align_log2 = ffs(bo_fake->alignment) - 1;
434	unsigned int sz;
435
436	if (!block)
437		return 1;
438
439	sz = (bo->size + bo_fake->alignment - 1) & ~(bo_fake->alignment - 1);
440
441	block->mem = mmAllocMem(bufmgr_fake->heap, sz, align_log2, 0);
442	if (!block->mem) {
443		free(block);
444		return 0;
445	}
446
447	DRMINITLISTHEAD(block);
448
449	/* Insert at head or at tail??? */
450	DRMLISTADDTAIL(block, &bufmgr_fake->lru);
451
452	block->virtual = (uint8_t *) bufmgr_fake->virtual +
453	    block->mem->ofs - bufmgr_fake->low_offset;
454	block->bo = bo;
455
456	bo_fake->block = block;
457
458	return 1;
459}
460
461/* Release the card storage associated with buf:
462 */
463static void
464free_block(drm_intel_bufmgr_fake *bufmgr_fake, struct block *block,
465	   int skip_dirty_copy)
466{
467	drm_intel_bo_fake *bo_fake;
468	DBG("free block %p %08x %d %d\n", block, block->mem->ofs,
469	    block->on_hardware, block->fenced);
470
471	if (!block)
472		return;
473
474	bo_fake = (drm_intel_bo_fake *) block->bo;
475
476	if (bo_fake->flags & (BM_PINNED | BM_NO_BACKING_STORE))
477		skip_dirty_copy = 1;
478
479	if (!skip_dirty_copy && (bo_fake->card_dirty == 1)) {
480		memcpy(bo_fake->backing_store, block->virtual, block->bo->size);
481		bo_fake->card_dirty = 0;
482		bo_fake->dirty = 1;
483	}
484
485	if (block->on_hardware) {
486		block->bo = NULL;
487	} else if (block->fenced) {
488		block->bo = NULL;
489	} else {
490		DBG("    - free immediately\n");
491		DRMLISTDEL(block);
492
493		mmFreeMem(block->mem);
494		free(block);
495	}
496}
497
498static void
499alloc_backing_store(drm_intel_bo *bo)
500{
501	drm_intel_bufmgr_fake *bufmgr_fake =
502	    (drm_intel_bufmgr_fake *) bo->bufmgr;
503	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
504	assert(!bo_fake->backing_store);
505	assert(!(bo_fake->flags & (BM_PINNED | BM_NO_BACKING_STORE)));
506
507	bo_fake->backing_store = malloc(bo->size);
508
509	DBG("alloc_backing - buf %d %p %lu\n", bo_fake->id,
510	    bo_fake->backing_store, bo->size);
511	assert(bo_fake->backing_store);
512}
513
514static void
515free_backing_store(drm_intel_bo *bo)
516{
517	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
518
519	if (bo_fake->backing_store) {
520		assert(!(bo_fake->flags & (BM_PINNED | BM_NO_BACKING_STORE)));
521		free(bo_fake->backing_store);
522		bo_fake->backing_store = NULL;
523	}
524}
525
526static void
527set_dirty(drm_intel_bo *bo)
528{
529	drm_intel_bufmgr_fake *bufmgr_fake =
530	    (drm_intel_bufmgr_fake *) bo->bufmgr;
531	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
532
533	if (bo_fake->flags & BM_NO_BACKING_STORE
534	    && bo_fake->invalidate_cb != NULL)
535		bo_fake->invalidate_cb(bo, bo_fake->invalidate_ptr);
536
537	assert(!(bo_fake->flags & BM_PINNED));
538
539	DBG("set_dirty - buf %d\n", bo_fake->id);
540	bo_fake->dirty = 1;
541}
542
543static int
544evict_lru(drm_intel_bufmgr_fake *bufmgr_fake, unsigned int max_fence)
545{
546	struct block *block, *tmp;
547
548	DBG("%s\n", __FUNCTION__);
549
550	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->lru) {
551		drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo;
552
553		if (bo_fake != NULL && (bo_fake->flags & BM_NO_FENCE_SUBDATA))
554			continue;
555
556		if (block->fence && max_fence && !FENCE_LTE(block->fence,
557							    max_fence))
558			return 0;
559
560		set_dirty(&bo_fake->bo);
561		bo_fake->block = NULL;
562
563		free_block(bufmgr_fake, block, 0);
564		return 1;
565	}
566
567	return 0;
568}
569
570static int
571evict_mru(drm_intel_bufmgr_fake *bufmgr_fake)
572{
573	struct block *block, *tmp;
574
575	DBG("%s\n", __FUNCTION__);
576
577	DRMLISTFOREACHSAFEREVERSE(block, tmp, &bufmgr_fake->lru) {
578		drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo;
579
580		if (bo_fake && (bo_fake->flags & BM_NO_FENCE_SUBDATA))
581			continue;
582
583		set_dirty(&bo_fake->bo);
584		bo_fake->block = NULL;
585
586		free_block(bufmgr_fake, block, 0);
587		return 1;
588	}
589
590	return 0;
591}
592
593/**
594 * Removes all objects from the fenced list older than the given fence.
595 */
596static int
597clear_fenced(drm_intel_bufmgr_fake *bufmgr_fake, unsigned int fence_cookie)
598{
599	struct block *block, *tmp;
600	int ret = 0;
601
602	bufmgr_fake->last_fence = fence_cookie;
603	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->fenced) {
604		assert(block->fenced);
605
606		if (_fence_test(bufmgr_fake, block->fence)) {
607
608			block->fenced = 0;
609
610			if (!block->bo) {
611				DBG("delayed free: offset %x sz %x\n",
612				    block->mem->ofs, block->mem->size);
613				DRMLISTDEL(block);
614				mmFreeMem(block->mem);
615				free(block);
616			} else {
617				DBG("return to lru: offset %x sz %x\n",
618				    block->mem->ofs, block->mem->size);
619				DRMLISTDEL(block);
620				DRMLISTADDTAIL(block, &bufmgr_fake->lru);
621			}
622
623			ret = 1;
624		} else {
625			/* Blocks are ordered by fence, so if one fails, all
626			 * from here will fail also:
627			 */
628			DBG("fence not passed: offset %x sz %x %d %d \n",
629			    block->mem->ofs, block->mem->size, block->fence,
630			    bufmgr_fake->last_fence);
631			break;
632		}
633	}
634
635	DBG("%s: %d\n", __FUNCTION__, ret);
636	return ret;
637}
638
639static void
640fence_blocks(drm_intel_bufmgr_fake *bufmgr_fake, unsigned fence)
641{
642	struct block *block, *tmp;
643
644	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->on_hardware) {
645		DBG("Fence block %p (sz 0x%x ofs %x buf %p) with fence %d\n",
646		    block, block->mem->size, block->mem->ofs, block->bo, fence);
647		block->fence = fence;
648
649		block->on_hardware = 0;
650		block->fenced = 1;
651
652		/* Move to tail of pending list here
653		 */
654		DRMLISTDEL(block);
655		DRMLISTADDTAIL(block, &bufmgr_fake->fenced);
656	}
657
658	assert(DRMLISTEMPTY(&bufmgr_fake->on_hardware));
659}
660
661static int
662evict_and_alloc_block(drm_intel_bo *bo)
663{
664	drm_intel_bufmgr_fake *bufmgr_fake =
665	    (drm_intel_bufmgr_fake *) bo->bufmgr;
666	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
667
668	assert(bo_fake->block == NULL);
669
670	/* Search for already free memory:
671	 */
672	if (alloc_block(bo))
673		return 1;
674
675	/* If we're not thrashing, allow lru eviction to dig deeper into
676	 * recently used textures.  We'll probably be thrashing soon:
677	 */
678	if (!bufmgr_fake->thrashing) {
679		while (evict_lru(bufmgr_fake, 0))
680			if (alloc_block(bo))
681				return 1;
682	}
683
684	/* Keep thrashing counter alive?
685	 */
686	if (bufmgr_fake->thrashing)
687		bufmgr_fake->thrashing = 20;
688
689	/* Wait on any already pending fences - here we are waiting for any
690	 * freed memory that has been submitted to hardware and fenced to
691	 * become available:
692	 */
693	while (!DRMLISTEMPTY(&bufmgr_fake->fenced)) {
694		uint32_t fence = bufmgr_fake->fenced.next->fence;
695		_fence_wait_internal(bufmgr_fake, fence);
696
697		if (alloc_block(bo))
698			return 1;
699	}
700
701	if (!DRMLISTEMPTY(&bufmgr_fake->on_hardware)) {
702		while (!DRMLISTEMPTY(&bufmgr_fake->fenced)) {
703			uint32_t fence = bufmgr_fake->fenced.next->fence;
704			_fence_wait_internal(bufmgr_fake, fence);
705		}
706
707		if (!bufmgr_fake->thrashing) {
708			DBG("thrashing\n");
709		}
710		bufmgr_fake->thrashing = 20;
711
712		if (alloc_block(bo))
713			return 1;
714	}
715
716	while (evict_mru(bufmgr_fake))
717		if (alloc_block(bo))
718			return 1;
719
720	DBG("%s 0x%lx bytes failed\n", __FUNCTION__, bo->size);
721
722	return 0;
723}
724
725/***********************************************************************
726 * Public functions
727 */
728
729/**
730 * Wait for hardware idle by emitting a fence and waiting for it.
731 */
732static void
733drm_intel_bufmgr_fake_wait_idle(drm_intel_bufmgr_fake *bufmgr_fake)
734{
735	unsigned int cookie;
736
737	cookie = _fence_emit_internal(bufmgr_fake);
738	_fence_wait_internal(bufmgr_fake, cookie);
739}
740
741/**
742 * Wait for rendering to a buffer to complete.
743 *
744 * It is assumed that the bathcbuffer which performed the rendering included
745 * the necessary flushing.
746 */
747static void
748drm_intel_fake_bo_wait_rendering_locked(drm_intel_bo *bo)
749{
750	drm_intel_bufmgr_fake *bufmgr_fake =
751	    (drm_intel_bufmgr_fake *) bo->bufmgr;
752	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
753
754	if (bo_fake->block == NULL || !bo_fake->block->fenced)
755		return;
756
757	_fence_wait_internal(bufmgr_fake, bo_fake->block->fence);
758}
759
760static void
761drm_intel_fake_bo_wait_rendering(drm_intel_bo *bo)
762{
763	drm_intel_bufmgr_fake *bufmgr_fake =
764	    (drm_intel_bufmgr_fake *) bo->bufmgr;
765
766	pthread_mutex_lock(&bufmgr_fake->lock);
767	drm_intel_fake_bo_wait_rendering_locked(bo);
768	pthread_mutex_unlock(&bufmgr_fake->lock);
769}
770
771/* Specifically ignore texture memory sharing.
772 *  -- just evict everything
773 *  -- and wait for idle
774 */
775drm_public void
776drm_intel_bufmgr_fake_contended_lock_take(drm_intel_bufmgr *bufmgr)
777{
778	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
779	struct block *block, *tmp;
780
781	pthread_mutex_lock(&bufmgr_fake->lock);
782
783	bufmgr_fake->need_fence = 1;
784	bufmgr_fake->fail = 0;
785
786	/* Wait for hardware idle.  We don't know where acceleration has been
787	 * happening, so we'll need to wait anyway before letting anything get
788	 * put on the card again.
789	 */
790	drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
791
792	/* Check that we hadn't released the lock without having fenced the last
793	 * set of buffers.
794	 */
795	assert(DRMLISTEMPTY(&bufmgr_fake->fenced));
796	assert(DRMLISTEMPTY(&bufmgr_fake->on_hardware));
797
798	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->lru) {
799		assert(_fence_test(bufmgr_fake, block->fence));
800		set_dirty(block->bo);
801	}
802
803	pthread_mutex_unlock(&bufmgr_fake->lock);
804}
805
806static drm_intel_bo *
807drm_intel_fake_bo_alloc(drm_intel_bufmgr *bufmgr,
808			const char *name,
809			unsigned long size,
810			unsigned int alignment)
811{
812	drm_intel_bufmgr_fake *bufmgr_fake;
813	drm_intel_bo_fake *bo_fake;
814
815	bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
816
817	assert(size != 0);
818
819	bo_fake = calloc(1, sizeof(*bo_fake));
820	if (!bo_fake)
821		return NULL;
822
823	bo_fake->bo.size = size;
824	bo_fake->bo.offset = -1;
825	bo_fake->bo.virtual = NULL;
826	bo_fake->bo.bufmgr = bufmgr;
827	bo_fake->refcount = 1;
828
829	/* Alignment must be a power of two */
830	assert((alignment & (alignment - 1)) == 0);
831	if (alignment == 0)
832		alignment = 1;
833	bo_fake->alignment = alignment;
834	bo_fake->id = ++bufmgr_fake->buf_nr;
835	bo_fake->name = name;
836	bo_fake->flags = 0;
837	bo_fake->is_static = 0;
838
839	DBG("drm_bo_alloc: (buf %d: %s, %lu kb)\n", bo_fake->id, bo_fake->name,
840	    bo_fake->bo.size / 1024);
841
842	return &bo_fake->bo;
843}
844
845static drm_intel_bo *
846drm_intel_fake_bo_alloc_tiled(drm_intel_bufmgr * bufmgr,
847			      const char *name,
848			      int x, int y, int cpp,
849			      uint32_t *tiling_mode,
850			      unsigned long *pitch,
851			      unsigned long flags)
852{
853	unsigned long stride, aligned_y;
854
855	/* No runtime tiling support for fake. */
856	*tiling_mode = I915_TILING_NONE;
857
858	/* Align it for being a render target.  Shouldn't need anything else. */
859	stride = x * cpp;
860	stride = ROUND_UP_TO(stride, 64);
861
862	/* 965 subspan loading alignment */
863	aligned_y = ALIGN(y, 2);
864
865	*pitch = stride;
866
867	return drm_intel_fake_bo_alloc(bufmgr, name, stride * aligned_y,
868				       4096);
869}
870
871drm_public drm_intel_bo *
872drm_intel_bo_fake_alloc_static(drm_intel_bufmgr *bufmgr,
873			       const char *name,
874			       unsigned long offset,
875			       unsigned long size, void *virtual)
876{
877	drm_intel_bufmgr_fake *bufmgr_fake;
878	drm_intel_bo_fake *bo_fake;
879
880	bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
881
882	assert(size != 0);
883
884	bo_fake = calloc(1, sizeof(*bo_fake));
885	if (!bo_fake)
886		return NULL;
887
888	bo_fake->bo.size = size;
889	bo_fake->bo.offset = offset;
890	bo_fake->bo.virtual = virtual;
891	bo_fake->bo.bufmgr = bufmgr;
892	bo_fake->refcount = 1;
893	bo_fake->id = ++bufmgr_fake->buf_nr;
894	bo_fake->name = name;
895	bo_fake->flags = BM_PINNED;
896	bo_fake->is_static = 1;
897
898	DBG("drm_bo_alloc_static: (buf %d: %s, %lu kb)\n", bo_fake->id,
899	    bo_fake->name, bo_fake->bo.size / 1024);
900
901	return &bo_fake->bo;
902}
903
904static void
905drm_intel_fake_bo_reference(drm_intel_bo *bo)
906{
907	drm_intel_bufmgr_fake *bufmgr_fake =
908	    (drm_intel_bufmgr_fake *) bo->bufmgr;
909	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
910
911	pthread_mutex_lock(&bufmgr_fake->lock);
912	bo_fake->refcount++;
913	pthread_mutex_unlock(&bufmgr_fake->lock);
914}
915
916static void
917drm_intel_fake_bo_reference_locked(drm_intel_bo *bo)
918{
919	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
920
921	bo_fake->refcount++;
922}
923
924static void
925drm_intel_fake_bo_unreference_locked(drm_intel_bo *bo)
926{
927	drm_intel_bufmgr_fake *bufmgr_fake =
928	    (drm_intel_bufmgr_fake *) bo->bufmgr;
929	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
930	int i;
931
932	if (--bo_fake->refcount == 0) {
933		assert(bo_fake->map_count == 0);
934		/* No remaining references, so free it */
935		if (bo_fake->block)
936			free_block(bufmgr_fake, bo_fake->block, 1);
937		free_backing_store(bo);
938
939		for (i = 0; i < bo_fake->nr_relocs; i++)
940			drm_intel_fake_bo_unreference_locked(bo_fake->relocs[i].
941							     target_buf);
942
943		DBG("drm_bo_unreference: free buf %d %s\n", bo_fake->id,
944		    bo_fake->name);
945
946		free(bo_fake->relocs);
947		free(bo);
948	}
949}
950
951static void
952drm_intel_fake_bo_unreference(drm_intel_bo *bo)
953{
954	drm_intel_bufmgr_fake *bufmgr_fake =
955	    (drm_intel_bufmgr_fake *) bo->bufmgr;
956
957	pthread_mutex_lock(&bufmgr_fake->lock);
958	drm_intel_fake_bo_unreference_locked(bo);
959	pthread_mutex_unlock(&bufmgr_fake->lock);
960}
961
962/**
963 * Set the buffer as not requiring backing store, and instead get the callback
964 * invoked whenever it would be set dirty.
965 */
966drm_public void
967drm_intel_bo_fake_disable_backing_store(drm_intel_bo *bo,
968					void (*invalidate_cb) (drm_intel_bo *bo,
969							       void *ptr),
970					void *ptr)
971{
972	drm_intel_bufmgr_fake *bufmgr_fake =
973	    (drm_intel_bufmgr_fake *) bo->bufmgr;
974	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
975
976	pthread_mutex_lock(&bufmgr_fake->lock);
977
978	if (bo_fake->backing_store)
979		free_backing_store(bo);
980
981	bo_fake->flags |= BM_NO_BACKING_STORE;
982
983	DBG("disable_backing_store set buf %d dirty\n", bo_fake->id);
984	bo_fake->dirty = 1;
985	bo_fake->invalidate_cb = invalidate_cb;
986	bo_fake->invalidate_ptr = ptr;
987
988	/* Note that it is invalid right from the start.  Also note
989	 * invalidate_cb is called with the bufmgr locked, so cannot
990	 * itself make bufmgr calls.
991	 */
992	if (invalidate_cb != NULL)
993		invalidate_cb(bo, ptr);
994
995	pthread_mutex_unlock(&bufmgr_fake->lock);
996}
997
998/**
999 * Map a buffer into bo->virtual, allocating either card memory space (If
1000 * BM_NO_BACKING_STORE or BM_PINNED) or backing store, as necessary.
1001 */
1002static int
1003 drm_intel_fake_bo_map_locked(drm_intel_bo *bo, int write_enable)
1004{
1005	drm_intel_bufmgr_fake *bufmgr_fake =
1006	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1007	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1008
1009	/* Static buffers are always mapped. */
1010	if (bo_fake->is_static) {
1011		if (bo_fake->card_dirty) {
1012			drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
1013			bo_fake->card_dirty = 0;
1014		}
1015		return 0;
1016	}
1017
1018	/* Allow recursive mapping.  Mesa may recursively map buffers with
1019	 * nested display loops, and it is used internally in bufmgr_fake
1020	 * for relocation.
1021	 */
1022	if (bo_fake->map_count++ != 0)
1023		return 0;
1024
1025	{
1026		DBG("drm_bo_map: (buf %d: %s, %lu kb)\n", bo_fake->id,
1027		    bo_fake->name, bo_fake->bo.size / 1024);
1028
1029		if (bo->virtual != NULL) {
1030			drmMsg("%s: already mapped\n", __FUNCTION__);
1031			abort();
1032		} else if (bo_fake->flags & (BM_NO_BACKING_STORE | BM_PINNED)) {
1033
1034			if (!bo_fake->block && !evict_and_alloc_block(bo)) {
1035				DBG("%s: alloc failed\n", __FUNCTION__);
1036				bufmgr_fake->fail = 1;
1037				return 1;
1038			} else {
1039				assert(bo_fake->block);
1040				bo_fake->dirty = 0;
1041
1042				if (!(bo_fake->flags & BM_NO_FENCE_SUBDATA) &&
1043				    bo_fake->block->fenced) {
1044					drm_intel_fake_bo_wait_rendering_locked
1045					    (bo);
1046				}
1047
1048				bo->virtual = bo_fake->block->virtual;
1049			}
1050		} else {
1051			if (write_enable)
1052				set_dirty(bo);
1053
1054			if (bo_fake->backing_store == 0)
1055				alloc_backing_store(bo);
1056
1057			if ((bo_fake->card_dirty == 1) && bo_fake->block) {
1058				if (bo_fake->block->fenced)
1059					drm_intel_fake_bo_wait_rendering_locked
1060					    (bo);
1061
1062				memcpy(bo_fake->backing_store,
1063				       bo_fake->block->virtual,
1064				       bo_fake->block->bo->size);
1065				bo_fake->card_dirty = 0;
1066			}
1067
1068			bo->virtual = bo_fake->backing_store;
1069		}
1070	}
1071
1072	return 0;
1073}
1074
1075static int
1076 drm_intel_fake_bo_map(drm_intel_bo *bo, int write_enable)
1077{
1078	drm_intel_bufmgr_fake *bufmgr_fake =
1079	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1080	int ret;
1081
1082	pthread_mutex_lock(&bufmgr_fake->lock);
1083	ret = drm_intel_fake_bo_map_locked(bo, write_enable);
1084	pthread_mutex_unlock(&bufmgr_fake->lock);
1085
1086	return ret;
1087}
1088
1089static int
1090 drm_intel_fake_bo_unmap_locked(drm_intel_bo *bo)
1091{
1092	drm_intel_bufmgr_fake *bufmgr_fake =
1093	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1094	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1095
1096	/* Static buffers are always mapped. */
1097	if (bo_fake->is_static)
1098		return 0;
1099
1100	assert(bo_fake->map_count != 0);
1101	if (--bo_fake->map_count != 0)
1102		return 0;
1103
1104	DBG("drm_bo_unmap: (buf %d: %s, %lu kb)\n", bo_fake->id, bo_fake->name,
1105	    bo_fake->bo.size / 1024);
1106
1107	bo->virtual = NULL;
1108
1109	return 0;
1110}
1111
1112static int drm_intel_fake_bo_unmap(drm_intel_bo *bo)
1113{
1114	drm_intel_bufmgr_fake *bufmgr_fake =
1115	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1116	int ret;
1117
1118	pthread_mutex_lock(&bufmgr_fake->lock);
1119	ret = drm_intel_fake_bo_unmap_locked(bo);
1120	pthread_mutex_unlock(&bufmgr_fake->lock);
1121
1122	return ret;
1123}
1124
1125static int
1126drm_intel_fake_bo_subdata(drm_intel_bo *bo, unsigned long offset,
1127			  unsigned long size, const void *data)
1128{
1129	int ret;
1130
1131	if (size == 0 || data == NULL)
1132		return 0;
1133
1134	ret = drm_intel_bo_map(bo, 1);
1135	if (ret)
1136		return ret;
1137	memcpy((unsigned char *)bo->virtual + offset, data, size);
1138	drm_intel_bo_unmap(bo);
1139	return 0;
1140}
1141
1142static void
1143 drm_intel_fake_kick_all_locked(drm_intel_bufmgr_fake *bufmgr_fake)
1144{
1145	struct block *block, *tmp;
1146
1147	bufmgr_fake->performed_rendering = 0;
1148	/* okay for ever BO that is on the HW kick it off.
1149	   seriously not afraid of the POLICE right now */
1150	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->on_hardware) {
1151		drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo;
1152
1153		block->on_hardware = 0;
1154		free_block(bufmgr_fake, block, 0);
1155		bo_fake->block = NULL;
1156		bo_fake->validated = 0;
1157		if (!(bo_fake->flags & BM_NO_BACKING_STORE))
1158			bo_fake->dirty = 1;
1159	}
1160
1161}
1162
1163static int
1164 drm_intel_fake_bo_validate(drm_intel_bo *bo)
1165{
1166	drm_intel_bufmgr_fake *bufmgr_fake;
1167	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1168
1169	bufmgr_fake = (drm_intel_bufmgr_fake *) bo->bufmgr;
1170
1171	DBG("drm_bo_validate: (buf %d: %s, %lu kb)\n", bo_fake->id,
1172	    bo_fake->name, bo_fake->bo.size / 1024);
1173
1174	/* Sanity check: Buffers should be unmapped before being validated.
1175	 * This is not so much of a problem for bufmgr_fake, but TTM refuses,
1176	 * and the problem is harder to debug there.
1177	 */
1178	assert(bo_fake->map_count == 0);
1179
1180	if (bo_fake->is_static) {
1181		/* Add it to the needs-fence list */
1182		bufmgr_fake->need_fence = 1;
1183		return 0;
1184	}
1185
1186	/* Allocate the card memory */
1187	if (!bo_fake->block && !evict_and_alloc_block(bo)) {
1188		bufmgr_fake->fail = 1;
1189		DBG("Failed to validate buf %d:%s\n", bo_fake->id,
1190		    bo_fake->name);
1191		return -1;
1192	}
1193
1194	assert(bo_fake->block);
1195	assert(bo_fake->block->bo == &bo_fake->bo);
1196
1197	bo->offset = bo_fake->block->mem->ofs;
1198
1199	/* Upload the buffer contents if necessary */
1200	if (bo_fake->dirty) {
1201		DBG("Upload dirty buf %d:%s, sz %lu offset 0x%x\n", bo_fake->id,
1202		    bo_fake->name, bo->size, bo_fake->block->mem->ofs);
1203
1204		assert(!(bo_fake->flags & (BM_NO_BACKING_STORE | BM_PINNED)));
1205
1206		/* Actually, should be able to just wait for a fence on the
1207		 * mmory, hich we would be tracking when we free it.  Waiting
1208		 * for idle is a sufficiently large hammer for now.
1209		 */
1210		drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
1211
1212		/* we may never have mapped this BO so it might not have any
1213		 * backing store if this happens it should be rare, but 0 the
1214		 * card memory in any case */
1215		if (bo_fake->backing_store)
1216			memcpy(bo_fake->block->virtual, bo_fake->backing_store,
1217			       bo->size);
1218		else
1219			memset(bo_fake->block->virtual, 0, bo->size);
1220
1221		bo_fake->dirty = 0;
1222	}
1223
1224	bo_fake->block->fenced = 0;
1225	bo_fake->block->on_hardware = 1;
1226	DRMLISTDEL(bo_fake->block);
1227	DRMLISTADDTAIL(bo_fake->block, &bufmgr_fake->on_hardware);
1228
1229	bo_fake->validated = 1;
1230	bufmgr_fake->need_fence = 1;
1231
1232	return 0;
1233}
1234
1235static void
1236drm_intel_fake_fence_validated(drm_intel_bufmgr *bufmgr)
1237{
1238	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1239	unsigned int cookie;
1240
1241	cookie = _fence_emit_internal(bufmgr_fake);
1242	fence_blocks(bufmgr_fake, cookie);
1243
1244	DBG("drm_fence_validated: 0x%08x cookie\n", cookie);
1245}
1246
1247static void
1248drm_intel_fake_destroy(drm_intel_bufmgr *bufmgr)
1249{
1250	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1251
1252	pthread_mutex_destroy(&bufmgr_fake->lock);
1253	mmDestroy(bufmgr_fake->heap);
1254	free(bufmgr);
1255}
1256
1257static int
1258drm_intel_fake_emit_reloc(drm_intel_bo *bo, uint32_t offset,
1259			  drm_intel_bo *target_bo, uint32_t target_offset,
1260			  uint32_t read_domains, uint32_t write_domain)
1261{
1262	drm_intel_bufmgr_fake *bufmgr_fake =
1263	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1264	struct fake_buffer_reloc *r;
1265	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1266	drm_intel_bo_fake *target_fake = (drm_intel_bo_fake *) target_bo;
1267	int i;
1268
1269	pthread_mutex_lock(&bufmgr_fake->lock);
1270
1271	assert(bo);
1272	assert(target_bo);
1273
1274	if (bo_fake->relocs == NULL) {
1275		bo_fake->relocs =
1276		    malloc(sizeof(struct fake_buffer_reloc) * MAX_RELOCS);
1277	}
1278
1279	r = &bo_fake->relocs[bo_fake->nr_relocs++];
1280
1281	assert(bo_fake->nr_relocs <= MAX_RELOCS);
1282
1283	drm_intel_fake_bo_reference_locked(target_bo);
1284
1285	if (!target_fake->is_static) {
1286		bo_fake->child_size +=
1287		    ALIGN(target_bo->size, target_fake->alignment);
1288		bo_fake->child_size += target_fake->child_size;
1289	}
1290	r->target_buf = target_bo;
1291	r->offset = offset;
1292	r->last_target_offset = target_bo->offset;
1293	r->delta = target_offset;
1294	r->read_domains = read_domains;
1295	r->write_domain = write_domain;
1296
1297	if (bufmgr_fake->debug) {
1298		/* Check that a conflicting relocation hasn't already been
1299		 * emitted.
1300		 */
1301		for (i = 0; i < bo_fake->nr_relocs - 1; i++) {
1302			struct fake_buffer_reloc *r2 = &bo_fake->relocs[i];
1303
1304			assert(r->offset != r2->offset);
1305		}
1306	}
1307
1308	pthread_mutex_unlock(&bufmgr_fake->lock);
1309
1310	return 0;
1311}
1312
1313/**
1314 * Incorporates the validation flags associated with each relocation into
1315 * the combined validation flags for the buffer on this batchbuffer submission.
1316 */
1317static void
1318drm_intel_fake_calculate_domains(drm_intel_bo *bo)
1319{
1320	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1321	int i;
1322
1323	for (i = 0; i < bo_fake->nr_relocs; i++) {
1324		struct fake_buffer_reloc *r = &bo_fake->relocs[i];
1325		drm_intel_bo_fake *target_fake =
1326		    (drm_intel_bo_fake *) r->target_buf;
1327
1328		/* Do the same for the tree of buffers we depend on */
1329		drm_intel_fake_calculate_domains(r->target_buf);
1330
1331		target_fake->read_domains |= r->read_domains;
1332		target_fake->write_domain |= r->write_domain;
1333	}
1334}
1335
1336static int
1337drm_intel_fake_reloc_and_validate_buffer(drm_intel_bo *bo)
1338{
1339	drm_intel_bufmgr_fake *bufmgr_fake =
1340	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1341	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1342	int i, ret;
1343
1344	assert(bo_fake->map_count == 0);
1345
1346	for (i = 0; i < bo_fake->nr_relocs; i++) {
1347		struct fake_buffer_reloc *r = &bo_fake->relocs[i];
1348		drm_intel_bo_fake *target_fake =
1349		    (drm_intel_bo_fake *) r->target_buf;
1350		uint32_t reloc_data;
1351
1352		/* Validate the target buffer if that hasn't been done. */
1353		if (!target_fake->validated) {
1354			ret =
1355			    drm_intel_fake_reloc_and_validate_buffer(r->target_buf);
1356			if (ret != 0) {
1357				if (bo->virtual != NULL)
1358					drm_intel_fake_bo_unmap_locked(bo);
1359				return ret;
1360			}
1361		}
1362
1363		/* Calculate the value of the relocation entry. */
1364		if (r->target_buf->offset != r->last_target_offset) {
1365			reloc_data = r->target_buf->offset + r->delta;
1366
1367			if (bo->virtual == NULL)
1368				drm_intel_fake_bo_map_locked(bo, 1);
1369
1370			*(uint32_t *) ((uint8_t *) bo->virtual + r->offset) =
1371			    reloc_data;
1372
1373			r->last_target_offset = r->target_buf->offset;
1374		}
1375	}
1376
1377	if (bo->virtual != NULL)
1378		drm_intel_fake_bo_unmap_locked(bo);
1379
1380	if (bo_fake->write_domain != 0) {
1381		if (!(bo_fake->flags & (BM_NO_BACKING_STORE | BM_PINNED))) {
1382			if (bo_fake->backing_store == 0)
1383				alloc_backing_store(bo);
1384		}
1385		bo_fake->card_dirty = 1;
1386		bufmgr_fake->performed_rendering = 1;
1387	}
1388
1389	return drm_intel_fake_bo_validate(bo);
1390}
1391
1392static void
1393drm_intel_bo_fake_post_submit(drm_intel_bo *bo)
1394{
1395	drm_intel_bufmgr_fake *bufmgr_fake =
1396	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1397	drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo;
1398	int i;
1399
1400	for (i = 0; i < bo_fake->nr_relocs; i++) {
1401		struct fake_buffer_reloc *r = &bo_fake->relocs[i];
1402		drm_intel_bo_fake *target_fake =
1403		    (drm_intel_bo_fake *) r->target_buf;
1404
1405		if (target_fake->validated)
1406			drm_intel_bo_fake_post_submit(r->target_buf);
1407
1408		DBG("%s@0x%08x + 0x%08x -> %s@0x%08x + 0x%08x\n",
1409		    bo_fake->name, (uint32_t) bo->offset, r->offset,
1410		    target_fake->name, (uint32_t) r->target_buf->offset,
1411		    r->delta);
1412	}
1413
1414	assert(bo_fake->map_count == 0);
1415	bo_fake->validated = 0;
1416	bo_fake->read_domains = 0;
1417	bo_fake->write_domain = 0;
1418}
1419
1420drm_public void
1421drm_intel_bufmgr_fake_set_exec_callback(drm_intel_bufmgr *bufmgr,
1422					     int (*exec) (drm_intel_bo *bo,
1423							  unsigned int used,
1424							  void *priv),
1425					     void *priv)
1426{
1427	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1428
1429	bufmgr_fake->exec = exec;
1430	bufmgr_fake->exec_priv = priv;
1431}
1432
1433static int
1434drm_intel_fake_bo_exec(drm_intel_bo *bo, int used,
1435		       drm_clip_rect_t * cliprects, int num_cliprects, int DR4)
1436{
1437	drm_intel_bufmgr_fake *bufmgr_fake =
1438	    (drm_intel_bufmgr_fake *) bo->bufmgr;
1439	drm_intel_bo_fake *batch_fake = (drm_intel_bo_fake *) bo;
1440	struct drm_i915_batchbuffer batch;
1441	int ret;
1442	int retry_count = 0;
1443
1444	pthread_mutex_lock(&bufmgr_fake->lock);
1445
1446	bufmgr_fake->performed_rendering = 0;
1447
1448	drm_intel_fake_calculate_domains(bo);
1449
1450	batch_fake->read_domains = I915_GEM_DOMAIN_COMMAND;
1451
1452	/* we've ran out of RAM so blow the whole lot away and retry */
1453restart:
1454	ret = drm_intel_fake_reloc_and_validate_buffer(bo);
1455	if (bufmgr_fake->fail == 1) {
1456		if (retry_count == 0) {
1457			retry_count++;
1458			drm_intel_fake_kick_all_locked(bufmgr_fake);
1459			bufmgr_fake->fail = 0;
1460			goto restart;
1461		} else		/* dump out the memory here */
1462			mmDumpMemInfo(bufmgr_fake->heap);
1463	}
1464
1465	assert(ret == 0);
1466
1467	if (bufmgr_fake->exec != NULL) {
1468		int ret = bufmgr_fake->exec(bo, used, bufmgr_fake->exec_priv);
1469		if (ret != 0) {
1470			pthread_mutex_unlock(&bufmgr_fake->lock);
1471			return ret;
1472		}
1473	} else {
1474		batch.start = bo->offset;
1475		batch.used = used;
1476		batch.cliprects = cliprects;
1477		batch.num_cliprects = num_cliprects;
1478		batch.DR1 = 0;
1479		batch.DR4 = DR4;
1480
1481		if (drmCommandWrite
1482		    (bufmgr_fake->fd, DRM_I915_BATCHBUFFER, &batch,
1483		     sizeof(batch))) {
1484			drmMsg("DRM_I915_BATCHBUFFER: %d\n", -errno);
1485			pthread_mutex_unlock(&bufmgr_fake->lock);
1486			return -errno;
1487		}
1488	}
1489
1490	drm_intel_fake_fence_validated(bo->bufmgr);
1491
1492	drm_intel_bo_fake_post_submit(bo);
1493
1494	pthread_mutex_unlock(&bufmgr_fake->lock);
1495
1496	return 0;
1497}
1498
1499/**
1500 * Return an error if the list of BOs will exceed the aperture size.
1501 *
1502 * This is a rough guess and likely to fail, as during the validate sequence we
1503 * may place a buffer in an inopportune spot early on and then fail to fit
1504 * a set smaller than the aperture.
1505 */
1506static int
1507drm_intel_fake_check_aperture_space(drm_intel_bo ** bo_array, int count)
1508{
1509	drm_intel_bufmgr_fake *bufmgr_fake =
1510	    (drm_intel_bufmgr_fake *) bo_array[0]->bufmgr;
1511	unsigned int sz = 0;
1512	int i;
1513
1514	for (i = 0; i < count; i++) {
1515		drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) bo_array[i];
1516
1517		if (bo_fake == NULL)
1518			continue;
1519
1520		if (!bo_fake->is_static)
1521			sz += ALIGN(bo_array[i]->size, bo_fake->alignment);
1522		sz += bo_fake->child_size;
1523	}
1524
1525	if (sz > bufmgr_fake->size) {
1526		DBG("check_space: overflowed bufmgr size, %ukb vs %lukb\n",
1527		    sz / 1024, bufmgr_fake->size / 1024);
1528		return -1;
1529	}
1530
1531	DBG("drm_check_space: sz %ukb vs bufgr %lukb\n", sz / 1024,
1532	    bufmgr_fake->size / 1024);
1533	return 0;
1534}
1535
1536/**
1537 * Evicts all buffers, waiting for fences to pass and copying contents out
1538 * as necessary.
1539 *
1540 * Used by the X Server on LeaveVT, when the card memory is no longer our
1541 * own.
1542 */
1543drm_public void
1544drm_intel_bufmgr_fake_evict_all(drm_intel_bufmgr *bufmgr)
1545{
1546	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1547	struct block *block, *tmp;
1548
1549	pthread_mutex_lock(&bufmgr_fake->lock);
1550
1551	bufmgr_fake->need_fence = 1;
1552	bufmgr_fake->fail = 0;
1553
1554	/* Wait for hardware idle.  We don't know where acceleration has been
1555	 * happening, so we'll need to wait anyway before letting anything get
1556	 * put on the card again.
1557	 */
1558	drm_intel_bufmgr_fake_wait_idle(bufmgr_fake);
1559
1560	/* Check that we hadn't released the lock without having fenced the last
1561	 * set of buffers.
1562	 */
1563	assert(DRMLISTEMPTY(&bufmgr_fake->fenced));
1564	assert(DRMLISTEMPTY(&bufmgr_fake->on_hardware));
1565
1566	DRMLISTFOREACHSAFE(block, tmp, &bufmgr_fake->lru) {
1567		drm_intel_bo_fake *bo_fake = (drm_intel_bo_fake *) block->bo;
1568		/* Releases the memory, and memcpys dirty contents out if
1569		 * necessary.
1570		 */
1571		free_block(bufmgr_fake, block, 0);
1572		bo_fake->block = NULL;
1573	}
1574
1575	pthread_mutex_unlock(&bufmgr_fake->lock);
1576}
1577
1578drm_public void
1579drm_intel_bufmgr_fake_set_last_dispatch(drm_intel_bufmgr *bufmgr,
1580					volatile unsigned int
1581					*last_dispatch)
1582{
1583	drm_intel_bufmgr_fake *bufmgr_fake = (drm_intel_bufmgr_fake *) bufmgr;
1584
1585	bufmgr_fake->last_dispatch = (volatile int *)last_dispatch;
1586}
1587
1588drm_public drm_intel_bufmgr *
1589drm_intel_bufmgr_fake_init(int fd, unsigned long low_offset,
1590			   void *low_virtual, unsigned long size,
1591			   volatile unsigned int *last_dispatch)
1592{
1593	drm_intel_bufmgr_fake *bufmgr_fake;
1594
1595	bufmgr_fake = calloc(1, sizeof(*bufmgr_fake));
1596
1597	if (pthread_mutex_init(&bufmgr_fake->lock, NULL) != 0) {
1598		free(bufmgr_fake);
1599		return NULL;
1600	}
1601
1602	/* Initialize allocator */
1603	DRMINITLISTHEAD(&bufmgr_fake->fenced);
1604	DRMINITLISTHEAD(&bufmgr_fake->on_hardware);
1605	DRMINITLISTHEAD(&bufmgr_fake->lru);
1606
1607	bufmgr_fake->low_offset = low_offset;
1608	bufmgr_fake->virtual = low_virtual;
1609	bufmgr_fake->size = size;
1610	bufmgr_fake->heap = mmInit(low_offset, size);
1611
1612	/* Hook in methods */
1613	bufmgr_fake->bufmgr.bo_alloc = drm_intel_fake_bo_alloc;
1614	bufmgr_fake->bufmgr.bo_alloc_for_render = drm_intel_fake_bo_alloc;
1615	bufmgr_fake->bufmgr.bo_alloc_tiled = drm_intel_fake_bo_alloc_tiled;
1616	bufmgr_fake->bufmgr.bo_reference = drm_intel_fake_bo_reference;
1617	bufmgr_fake->bufmgr.bo_unreference = drm_intel_fake_bo_unreference;
1618	bufmgr_fake->bufmgr.bo_map = drm_intel_fake_bo_map;
1619	bufmgr_fake->bufmgr.bo_unmap = drm_intel_fake_bo_unmap;
1620	bufmgr_fake->bufmgr.bo_subdata = drm_intel_fake_bo_subdata;
1621	bufmgr_fake->bufmgr.bo_wait_rendering =
1622	    drm_intel_fake_bo_wait_rendering;
1623	bufmgr_fake->bufmgr.bo_emit_reloc = drm_intel_fake_emit_reloc;
1624	bufmgr_fake->bufmgr.destroy = drm_intel_fake_destroy;
1625	bufmgr_fake->bufmgr.bo_exec = drm_intel_fake_bo_exec;
1626	bufmgr_fake->bufmgr.check_aperture_space =
1627	    drm_intel_fake_check_aperture_space;
1628	bufmgr_fake->bufmgr.debug = 0;
1629
1630	bufmgr_fake->fd = fd;
1631	bufmgr_fake->last_dispatch = (volatile int *)last_dispatch;
1632
1633	return &bufmgr_fake->bufmgr;
1634}
1635