radeon_dma.c revision 13b5a624b1899c457279907d58046dfb3c95addc
1/**************************************************************************
2
3Copyright (C) 2004 Nicolai Haehnle.
4Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
5
6The Weather Channel (TM) funded Tungsten Graphics to develop the
7initial release of the Radeon 8500 driver under the XFree86 license.
8This notice must be preserved.
9
10All Rights Reserved.
11
12Permission is hereby granted, free of charge, to any person obtaining a
13copy of this software and associated documentation files (the "Software"),
14to deal in the Software without restriction, including without limitation
15on the rights to use, copy, modify, merge, publish, distribute, sub
16license, and/or sell copies of the Software, and to permit persons to whom
17the Software is furnished to do so, subject to the following conditions:
18
19The above copyright notice and this permission notice (including the next
20paragraph) shall be included in all copies or substantial portions of the
21Software.
22
23THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29USE OR OTHER DEALINGS IN THE SOFTWARE.
30
31**************************************************************************/
32
33#include <errno.h>
34#include "radeon_common.h"
35#include "main/simple_list.h"
36
37#if defined(USE_X86_ASM)
38#define COPY_DWORDS( dst, src, nr )					\
39do {									\
40	int __tmp;							\
41	__asm__ __volatile__( "rep ; movsl"				\
42			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
43			      : "0" (nr),				\
44			        "D" ((long)dst),			\
45			        "S" ((long)src) );			\
46} while (0)
47#else
48#define COPY_DWORDS( dst, src, nr )		\
49do {						\
50   int j;					\
51   for ( j = 0 ; j < nr ; j++ )			\
52      dst[j] = ((int *)src)[j];			\
53   dst += nr;					\
54} while (0)
55#endif
56
57void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
58{
59	int i;
60
61	if (RADEON_DEBUG & RADEON_VERTS)
62		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
63			__FUNCTION__, count, stride, (void *)out, (void *)data);
64
65	if (stride == 4)
66		COPY_DWORDS(out, data, count);
67	else
68		for (i = 0; i < count; i++) {
69			out[0] = *(int *)data;
70			out++;
71			data += stride;
72		}
73}
74
75void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
76{
77	int i;
78
79	if (RADEON_DEBUG & RADEON_VERTS)
80		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
81			__FUNCTION__, count, stride, (void *)out, (void *)data);
82
83	if (stride == 8)
84		COPY_DWORDS(out, data, count * 2);
85	else
86		for (i = 0; i < count; i++) {
87			out[0] = *(int *)data;
88			out[1] = *(int *)(data + 4);
89			out += 2;
90			data += stride;
91		}
92}
93
94void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
95{
96	int i;
97
98	if (RADEON_DEBUG & RADEON_VERTS)
99		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
100			__FUNCTION__, count, stride, (void *)out, (void *)data);
101
102	if (stride == 12) {
103		COPY_DWORDS(out, data, count * 3);
104    }
105	else
106		for (i = 0; i < count; i++) {
107			out[0] = *(int *)data;
108			out[1] = *(int *)(data + 4);
109			out[2] = *(int *)(data + 8);
110			out += 3;
111			data += stride;
112		}
113}
114
115void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
116{
117	int i;
118
119	if (RADEON_DEBUG & RADEON_VERTS)
120		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
121			__FUNCTION__, count, stride, (void *)out, (void *)data);
122
123	if (stride == 16)
124		COPY_DWORDS(out, data, count * 4);
125	else
126		for (i = 0; i < count; i++) {
127			out[0] = *(int *)data;
128			out[1] = *(int *)(data + 4);
129			out[2] = *(int *)(data + 8);
130			out[3] = *(int *)(data + 12);
131			out += 4;
132			data += stride;
133		}
134}
135
136void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
137			 const GLvoid * data, int size, int stride, int count)
138{
139	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
140	uint32_t *out;
141
142	if (stride == 0) {
143		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
144		count = 1;
145		aos->stride = 0;
146	} else {
147		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
148		aos->stride = size;
149	}
150
151	aos->components = size;
152	aos->count = count;
153
154	out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
155	switch (size) {
156	case 1: radeonEmitVec4(out, data, stride, count); break;
157	case 2: radeonEmitVec8(out, data, stride, count); break;
158	case 3: radeonEmitVec12(out, data, stride, count); break;
159	case 4: radeonEmitVec16(out, data, stride, count); break;
160	default:
161		assert(0);
162		break;
163	}
164}
165
166void radeon_init_dma(radeonContextPtr rmesa)
167{
168	make_empty_list(&rmesa->dma.free);
169	make_empty_list(&rmesa->dma.wait);
170	make_empty_list(&rmesa->dma.reserved);
171	rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
172}
173
174void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
175{
176	struct radeon_dma_bo *dma_bo = NULL;
177	/* we set minimum sizes to at least requested size
178	   aligned to next 16 bytes. */
179	if (size > rmesa->dma.minimum_size)
180		rmesa->dma.minimum_size = (size + 15) & (~15);
181
182	radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %d\n",
183			__FUNCTION__, size, rmesa->dma.minimum_size);
184
185
186	/* unmap old reserved bo */
187	if (!is_empty_list(&rmesa->dma.reserved))
188		radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
189
190	if (is_empty_list(&rmesa->dma.free)
191	      || last_elem(&rmesa->dma.free)->bo->size < size) {
192		dma_bo = CALLOC_STRUCT(radeon_dma_bo);
193		assert(dma_bo);
194
195again_alloc:
196		dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
197					    0, rmesa->dma.minimum_size, 4,
198					    RADEON_GEM_DOMAIN_GTT, 0);
199
200		if (!dma_bo->bo) {
201			rcommonFlushCmdBuf(rmesa, __FUNCTION__);
202			goto again_alloc;
203		}
204		insert_at_head(&rmesa->dma.reserved, dma_bo);
205	} else {
206		/* We push and pop buffers from end of list so we can keep
207		   counter on unused buffers for later freeing them from
208		   begin of list */
209		dma_bo = last_elem(&rmesa->dma.free);
210		assert(dma_bo->bo->cref == 1);
211		remove_from_list(dma_bo);
212		insert_at_head(&rmesa->dma.reserved, dma_bo);
213	}
214
215	rmesa->dma.current_used = 0;
216	rmesa->dma.current_vertexptr = 0;
217
218	if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
219					  first_elem(&rmesa->dma.reserved)->bo,
220					  RADEON_GEM_DOMAIN_GTT, 0))
221		fprintf(stderr,"failure to revalidate BOs - badness\n");
222
223	if (is_empty_list(&rmesa->dma.reserved)) {
224        /* Cmd buff have been flushed in radeon_revalidate_bos */
225		goto again_alloc;
226	}
227
228	radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
229}
230
231/* Allocates a region from rmesa->dma.current.  If there isn't enough
232 * space in current, grab a new buffer (and discard what was left of current)
233 */
234void radeonAllocDmaRegion(radeonContextPtr rmesa,
235			  struct radeon_bo **pbo, int *poffset,
236			  int bytes, int alignment)
237{
238	if (RADEON_DEBUG & RADEON_IOCTL)
239		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
240
241	if (rmesa->dma.flush)
242		rmesa->dma.flush(rmesa->glCtx);
243
244	assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
245
246	alignment--;
247	rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
248
249	if (is_empty_list(&rmesa->dma.reserved)
250		|| rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
251		radeonRefillCurrentDmaRegion(rmesa, bytes);
252
253	*poffset = rmesa->dma.current_used;
254	*pbo = first_elem(&rmesa->dma.reserved)->bo;
255	radeon_bo_ref(*pbo);
256
257	/* Always align to at least 16 bytes */
258	rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
259	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
260
261	assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
262}
263
264void radeonFreeDmaRegions(radeonContextPtr rmesa)
265{
266	struct radeon_dma_bo *dma_bo;
267	struct radeon_dma_bo *temp;
268	if (RADEON_DEBUG & RADEON_DMA)
269		fprintf(stderr, "%s\n", __FUNCTION__);
270
271	foreach_s(dma_bo, temp, &rmesa->dma.free) {
272		remove_from_list(dma_bo);
273	        radeon_bo_unref(dma_bo->bo);
274		FREE(dma_bo);
275	}
276
277	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
278		remove_from_list(dma_bo);
279	        radeon_bo_unref(dma_bo->bo);
280		FREE(dma_bo);
281	}
282
283	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
284		remove_from_list(dma_bo);
285		radeon_bo_unmap(dma_bo->bo);
286	        radeon_bo_unref(dma_bo->bo);
287		FREE(dma_bo);
288	}
289}
290
291void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
292{
293	if (is_empty_list(&rmesa->dma.reserved))
294		return;
295
296	if (RADEON_DEBUG & RADEON_IOCTL)
297		fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
298	rmesa->dma.current_used -= return_bytes;
299	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
300}
301
302static int radeon_bo_is_idle(struct radeon_bo* bo)
303{
304	uint32_t domain;
305	int ret = radeon_bo_is_busy(bo, &domain);
306	if (ret == -EINVAL) {
307		WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
308			"This may cause small performance drop for you.\n");
309	}
310	return ret != -EBUSY;
311}
312
313void radeonReleaseDmaRegions(radeonContextPtr rmesa)
314{
315	struct radeon_dma_bo *dma_bo;
316	struct radeon_dma_bo *temp;
317	const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
318	const int time = rmesa->dma.free.expire_counter;
319
320	if (RADEON_DEBUG & RADEON_DMA) {
321		size_t free = 0,
322		       wait = 0,
323		       reserved = 0;
324		foreach(dma_bo, &rmesa->dma.free)
325			++free;
326
327		foreach(dma_bo, &rmesa->dma.wait)
328			++wait;
329
330		foreach(dma_bo, &rmesa->dma.reserved)
331			++reserved;
332
333		fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
334		      __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
335	}
336
337	if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
338		/* request updated cs processing information from kernel */
339		legacy_track_pending(rmesa->radeonScreen->bom, 0);
340	}
341	/* move waiting bos to free list.
342	   wait list provides gpu time to handle data before reuse */
343	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
344		if (dma_bo->expire_counter == time) {
345			WARN_ONCE("Leaking dma buffer object!\n");
346			radeon_bo_unref(dma_bo->bo);
347			remove_from_list(dma_bo);
348			FREE(dma_bo);
349			continue;
350		}
351		/* free objects that are too small to be used because of large request */
352		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
353		   radeon_bo_unref(dma_bo->bo);
354		   remove_from_list(dma_bo);
355		   FREE(dma_bo);
356		   continue;
357		}
358		if (!radeon_bo_is_idle(dma_bo->bo))
359			continue;
360		remove_from_list(dma_bo);
361		dma_bo->expire_counter = expire_at;
362		insert_at_tail(&rmesa->dma.free, dma_bo);
363	}
364
365	/* unmap the last dma region */
366	if (!is_empty_list(&rmesa->dma.reserved))
367		radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
368	/* move reserved to wait list */
369	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
370		/* free objects that are too small to be used because of large request */
371		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
372		   radeon_bo_unref(dma_bo->bo);
373		   remove_from_list(dma_bo);
374		   FREE(dma_bo);
375		   continue;
376		}
377		remove_from_list(dma_bo);
378		dma_bo->expire_counter = expire_at;
379		insert_at_tail(&rmesa->dma.wait, dma_bo);
380	}
381
382	/* free bos that have been unused for some time */
383	foreach_s(dma_bo, temp, &rmesa->dma.free) {
384		if (dma_bo->expire_counter != time)
385			break;
386		remove_from_list(dma_bo);
387	        radeon_bo_unref(dma_bo->bo);
388		FREE(dma_bo);
389	}
390
391}
392
393
394/* Flush vertices in the current dma region.
395 */
396void rcommon_flush_last_swtcl_prim( GLcontext *ctx  )
397{
398	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
399	struct radeon_dma *dma = &rmesa->dma;
400
401
402	if (RADEON_DEBUG & RADEON_IOCTL)
403		fprintf(stderr, "%s\n", __FUNCTION__);
404	dma->flush = NULL;
405
406	if (!is_empty_list(&dma->reserved)) {
407	    GLuint current_offset = dma->current_used;
408
409	    assert (dma->current_used +
410		    rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
411		    dma->current_vertexptr);
412
413	    if (dma->current_used != dma->current_vertexptr) {
414		    dma->current_used = dma->current_vertexptr;
415
416		    rmesa->vtbl.swtcl_flush(ctx, current_offset);
417	    }
418	    rmesa->swtcl.numverts = 0;
419	}
420}
421/* Alloc space in the current dma region.
422 */
423void *
424rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
425{
426	GLuint bytes = vsize * nverts;
427	void *head;
428	if (RADEON_DEBUG & RADEON_IOCTL)
429		fprintf(stderr, "%s\n", __FUNCTION__);
430	if(is_empty_list(&rmesa->dma.reserved)
431	      ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
432		if (rmesa->dma.flush) {
433			rmesa->dma.flush(rmesa->glCtx);
434		}
435
436                radeonRefillCurrentDmaRegion(rmesa, bytes);
437
438		return NULL;
439	}
440
441        if (!rmesa->dma.flush) {
442		/* if cmdbuf flushed DMA restart */
443                rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
444                rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
445        }
446
447	ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
448        ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
449        ASSERT( rmesa->dma.current_used +
450                rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
451                rmesa->dma.current_vertexptr );
452
453	head = (first_elem(&rmesa->dma.reserved)->bo->ptr + rmesa->dma.current_vertexptr);
454	rmesa->dma.current_vertexptr += bytes;
455	rmesa->swtcl.numverts += nverts;
456	return head;
457}
458
459void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
460{
461   radeonContextPtr radeon = RADEON_CONTEXT( ctx );
462   int i;
463	if (RADEON_DEBUG & RADEON_IOCTL)
464		fprintf(stderr, "%s\n", __FUNCTION__);
465
466   if (radeon->dma.flush) {
467       radeon->dma.flush(radeon->glCtx);
468   }
469   for (i = 0; i < radeon->tcl.aos_count; i++) {
470      if (radeon->tcl.aos[i].bo) {
471         radeon_bo_unref(radeon->tcl.aos[i].bo);
472         radeon->tcl.aos[i].bo = NULL;
473
474      }
475   }
476}
477