radeon_dma.c revision 06d3732a9094030fc33120f16f162e0d405f132c
1/**************************************************************************
2
3Copyright (C) 2004 Nicolai Haehnle.
4Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
5
6The Weather Channel (TM) funded Tungsten Graphics to develop the
7initial release of the Radeon 8500 driver under the XFree86 license.
8This notice must be preserved.
9
10All Rights Reserved.
11
12Permission is hereby granted, free of charge, to any person obtaining a
13copy of this software and associated documentation files (the "Software"),
14to deal in the Software without restriction, including without limitation
15on the rights to use, copy, modify, merge, publish, distribute, sub
16license, and/or sell copies of the Software, and to permit persons to whom
17the Software is furnished to do so, subject to the following conditions:
18
19The above copyright notice and this permission notice (including the next
20paragraph) shall be included in all copies or substantial portions of the
21Software.
22
23THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29USE OR OTHER DEALINGS IN THE SOFTWARE.
30
31**************************************************************************/
32
33#include <errno.h>
34#include "radeon_common.h"
35#include "main/simple_list.h"
36
37#if defined(USE_X86_ASM)
38#define COPY_DWORDS( dst, src, nr )					\
39do {									\
40	int __tmp;							\
41	__asm__ __volatile__( "rep ; movsl"				\
42			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
43			      : "0" (nr),				\
44			        "D" ((long)dst),			\
45			        "S" ((long)src) );			\
46} while (0)
47#else
48#define COPY_DWORDS( dst, src, nr )		\
49do {						\
50   int j;					\
51   for ( j = 0 ; j < nr ; j++ )			\
52      dst[j] = ((int *)src)[j];			\
53   dst += nr;					\
54} while (0)
55#endif
56
57void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
58{
59	int i;
60
61	if (RADEON_DEBUG & RADEON_VERTS)
62		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
63			__FUNCTION__, count, stride, (void *)out, (void *)data);
64
65	if (stride == 4)
66		COPY_DWORDS(out, data, count);
67	else
68		for (i = 0; i < count; i++) {
69			out[0] = *(int *)data;
70			out++;
71			data += stride;
72		}
73}
74
75void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
76{
77	int i;
78
79	if (RADEON_DEBUG & RADEON_VERTS)
80		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
81			__FUNCTION__, count, stride, (void *)out, (void *)data);
82
83	if (stride == 8)
84		COPY_DWORDS(out, data, count * 2);
85	else
86		for (i = 0; i < count; i++) {
87			out[0] = *(int *)data;
88			out[1] = *(int *)(data + 4);
89			out += 2;
90			data += stride;
91		}
92}
93
94void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
95{
96	int i;
97
98	if (RADEON_DEBUG & RADEON_VERTS)
99		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
100			__FUNCTION__, count, stride, (void *)out, (void *)data);
101
102	if (stride == 12) {
103		COPY_DWORDS(out, data, count * 3);
104    }
105	else
106		for (i = 0; i < count; i++) {
107			out[0] = *(int *)data;
108			out[1] = *(int *)(data + 4);
109			out[2] = *(int *)(data + 8);
110			out += 3;
111			data += stride;
112		}
113}
114
115void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
116{
117	int i;
118
119	if (RADEON_DEBUG & RADEON_VERTS)
120		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
121			__FUNCTION__, count, stride, (void *)out, (void *)data);
122
123	if (stride == 16)
124		COPY_DWORDS(out, data, count * 4);
125	else
126		for (i = 0; i < count; i++) {
127			out[0] = *(int *)data;
128			out[1] = *(int *)(data + 4);
129			out[2] = *(int *)(data + 8);
130			out[3] = *(int *)(data + 12);
131			out += 4;
132			data += stride;
133		}
134}
135
136void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
137			 const GLvoid * data, int size, int stride, int count)
138{
139	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
140	uint32_t *out;
141
142	if (stride == 0) {
143		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
144		count = 1;
145		aos->stride = 0;
146	} else {
147		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
148		aos->stride = size;
149	}
150
151	aos->components = size;
152	aos->count = count;
153
154	radeon_bo_map(aos->bo, 1);
155	out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
156	switch (size) {
157	case 1: radeonEmitVec4(out, data, stride, count); break;
158	case 2: radeonEmitVec8(out, data, stride, count); break;
159	case 3: radeonEmitVec12(out, data, stride, count); break;
160	case 4: radeonEmitVec16(out, data, stride, count); break;
161	default:
162		assert(0);
163		break;
164	}
165	radeon_bo_unmap(aos->bo);
166}
167
168void radeon_init_dma(radeonContextPtr rmesa)
169{
170	make_empty_list(&rmesa->dma.free);
171	make_empty_list(&rmesa->dma.wait);
172	make_empty_list(&rmesa->dma.reserved);
173	rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
174}
175
176void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
177{
178	struct radeon_dma_bo *dma_bo = NULL;
179	/* we set minimum sizes to at least requested size
180	   aligned to next 16 bytes. */
181	if (size > rmesa->dma.minimum_size)
182		rmesa->dma.minimum_size = (size + 15) & (~15);
183
184	radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %d\n",
185			__FUNCTION__, size, rmesa->dma.minimum_size);
186
187
188	if (is_empty_list(&rmesa->dma.free)
189	      || last_elem(&rmesa->dma.free)->bo->size < size) {
190		dma_bo = CALLOC_STRUCT(radeon_dma_bo);
191		assert(dma_bo);
192
193again_alloc:
194		dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
195					    0, rmesa->dma.minimum_size, 4,
196					    RADEON_GEM_DOMAIN_GTT, 0);
197
198		if (!dma_bo->bo) {
199			rcommonFlushCmdBuf(rmesa, __FUNCTION__);
200			goto again_alloc;
201		}
202		insert_at_head(&rmesa->dma.reserved, dma_bo);
203	} else {
204		/* We push and pop buffers from end of list so we can keep
205		   counter on unused buffers for later freeing them from
206		   begin of list */
207		dma_bo = last_elem(&rmesa->dma.free);
208		assert(dma_bo->bo->cref == 1);
209		remove_from_list(dma_bo);
210		insert_at_head(&rmesa->dma.reserved, dma_bo);
211	}
212
213	rmesa->dma.current_used = 0;
214	rmesa->dma.current_vertexptr = 0;
215
216	if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
217					  first_elem(&rmesa->dma.reserved)->bo,
218					  RADEON_GEM_DOMAIN_GTT, 0))
219		fprintf(stderr,"failure to revalidate BOs - badness\n");
220
221	if (is_empty_list(&rmesa->dma.reserved)) {
222        /* Cmd buff have been flushed in radeon_revalidate_bos */
223		goto again_alloc;
224	}
225}
226
227/* Allocates a region from rmesa->dma.current.  If there isn't enough
228 * space in current, grab a new buffer (and discard what was left of current)
229 */
230void radeonAllocDmaRegion(radeonContextPtr rmesa,
231			  struct radeon_bo **pbo, int *poffset,
232			  int bytes, int alignment)
233{
234	if (RADEON_DEBUG & RADEON_IOCTL)
235		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
236
237	if (rmesa->dma.flush)
238		rmesa->dma.flush(rmesa->glCtx);
239
240	assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
241
242	alignment--;
243	rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
244
245	if (is_empty_list(&rmesa->dma.reserved)
246		|| rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
247		radeonRefillCurrentDmaRegion(rmesa, bytes);
248
249	*poffset = rmesa->dma.current_used;
250	*pbo = first_elem(&rmesa->dma.reserved)->bo;
251	radeon_bo_ref(*pbo);
252
253	/* Always align to at least 16 bytes */
254	rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
255	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
256
257	assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
258}
259
260void radeonFreeDmaRegions(radeonContextPtr rmesa)
261{
262	struct radeon_dma_bo *dma_bo;
263	struct radeon_dma_bo *temp;
264	if (RADEON_DEBUG & RADEON_DMA)
265		fprintf(stderr, "%s\n", __FUNCTION__);
266
267	foreach_s(dma_bo, temp, &rmesa->dma.free) {
268		remove_from_list(dma_bo);
269	        radeon_bo_unref(dma_bo->bo);
270		FREE(dma_bo);
271	}
272
273	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
274		remove_from_list(dma_bo);
275	        radeon_bo_unref(dma_bo->bo);
276		FREE(dma_bo);
277	}
278
279	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
280		remove_from_list(dma_bo);
281	        radeon_bo_unref(dma_bo->bo);
282		FREE(dma_bo);
283	}
284}
285
286void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
287{
288	if (is_empty_list(&rmesa->dma.reserved))
289		return;
290
291	if (RADEON_DEBUG & RADEON_IOCTL)
292		fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
293	rmesa->dma.current_used -= return_bytes;
294	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
295}
296
297static int radeon_bo_is_idle(struct radeon_bo* bo)
298{
299	uint32_t domain;
300	int ret = radeon_bo_is_busy(bo, &domain);
301	if (ret == -EINVAL) {
302		WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
303			"This may cause small performance drop for you.\n");
304	}
305	return ret != -EBUSY;
306}
307
308void radeonReleaseDmaRegions(radeonContextPtr rmesa)
309{
310	struct radeon_dma_bo *dma_bo;
311	struct radeon_dma_bo *temp;
312	const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
313	const int time = rmesa->dma.free.expire_counter;
314
315	if (RADEON_DEBUG & RADEON_DMA) {
316		size_t free = 0,
317		       wait = 0,
318		       reserved = 0;
319		foreach(dma_bo, &rmesa->dma.free)
320			++free;
321
322		foreach(dma_bo, &rmesa->dma.wait)
323			++wait;
324
325		foreach(dma_bo, &rmesa->dma.reserved)
326			++reserved;
327
328		fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
329		      __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
330	}
331
332	if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
333		/* request updated cs processing information from kernel */
334		legacy_track_pending(rmesa->radeonScreen->bom, 0);
335	}
336	/* move waiting bos to free list.
337	   wait list provides gpu time to handle data before reuse */
338	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
339		if (dma_bo->expire_counter == time) {
340			WARN_ONCE("Leaking dma buffer object!\n");
341			radeon_bo_unref(dma_bo->bo);
342			remove_from_list(dma_bo);
343			FREE(dma_bo);
344			continue;
345		}
346		/* free objects that are too small to be used because of large request */
347		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
348		   radeon_bo_unref(dma_bo->bo);
349		   remove_from_list(dma_bo);
350		   FREE(dma_bo);
351		   continue;
352		}
353		if (!radeon_bo_is_idle(dma_bo->bo))
354			continue;
355		remove_from_list(dma_bo);
356		dma_bo->expire_counter = expire_at;
357		insert_at_tail(&rmesa->dma.free, dma_bo);
358	}
359
360	/* move reserved to wait list */
361	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
362		/* free objects that are too small to be used because of large request */
363		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
364		   radeon_bo_unref(dma_bo->bo);
365		   remove_from_list(dma_bo);
366		   FREE(dma_bo);
367		   continue;
368		}
369		remove_from_list(dma_bo);
370		dma_bo->expire_counter = expire_at;
371		insert_at_tail(&rmesa->dma.wait, dma_bo);
372	}
373
374	/* free bos that have been unused for some time */
375	foreach_s(dma_bo, temp, &rmesa->dma.free) {
376		if (dma_bo->expire_counter != time)
377			break;
378		remove_from_list(dma_bo);
379	        radeon_bo_unref(dma_bo->bo);
380		FREE(dma_bo);
381	}
382
383}
384
385
386/* Flush vertices in the current dma region.
387 */
388void rcommon_flush_last_swtcl_prim( GLcontext *ctx  )
389{
390	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
391	struct radeon_dma *dma = &rmesa->dma;
392
393	if (RADEON_DEBUG & RADEON_IOCTL)
394		fprintf(stderr, "%s\n", __FUNCTION__);
395	dma->flush = NULL;
396
397	radeon_bo_unmap(rmesa->swtcl.bo);
398
399	if (!is_empty_list(&dma->reserved)) {
400	    GLuint current_offset = dma->current_used;
401
402	    assert (dma->current_used +
403		    rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
404		    dma->current_vertexptr);
405
406	    if (dma->current_used != dma->current_vertexptr) {
407		    dma->current_used = dma->current_vertexptr;
408
409		    rmesa->vtbl.swtcl_flush(ctx, current_offset);
410	    }
411	    rmesa->swtcl.numverts = 0;
412	}
413	radeon_bo_unref(rmesa->swtcl.bo);
414	rmesa->swtcl.bo = NULL;
415}
416/* Alloc space in the current dma region.
417 */
418void *
419rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
420{
421	GLuint bytes = vsize * nverts;
422	void *head;
423	if (RADEON_DEBUG & RADEON_IOCTL)
424		fprintf(stderr, "%s\n", __FUNCTION__);
425
426	if(is_empty_list(&rmesa->dma.reserved)
427	      ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
428		if (rmesa->dma.flush) {
429			rmesa->dma.flush(rmesa->glCtx);
430		}
431
432                radeonRefillCurrentDmaRegion(rmesa, bytes);
433
434		return NULL;
435	}
436
437        if (!rmesa->dma.flush) {
438		/* if cmdbuf flushed DMA restart */
439                rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
440                rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
441        }
442
443	ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
444        ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
445        ASSERT( rmesa->dma.current_used +
446                rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
447                rmesa->dma.current_vertexptr );
448
449	if (!rmesa->swtcl.bo) {
450		rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo;
451		radeon_bo_ref(rmesa->swtcl.bo);
452		radeon_bo_map(rmesa->swtcl.bo, 1);
453	}
454
455	head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr);
456	rmesa->dma.current_vertexptr += bytes;
457	rmesa->swtcl.numverts += nverts;
458	return head;
459}
460
461void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
462{
463   radeonContextPtr radeon = RADEON_CONTEXT( ctx );
464   int i;
465	if (RADEON_DEBUG & RADEON_IOCTL)
466		fprintf(stderr, "%s\n", __FUNCTION__);
467
468   if (radeon->dma.flush) {
469       radeon->dma.flush(radeon->glCtx);
470   }
471   for (i = 0; i < radeon->tcl.aos_count; i++) {
472      if (radeon->tcl.aos[i].bo) {
473         radeon_bo_unref(radeon->tcl.aos[i].bo);
474         radeon->tcl.aos[i].bo = NULL;
475
476      }
477   }
478}
479