radeon_dma.c revision 9a7776696b786180f1d384eb22b928707e74dfca
1/**************************************************************************
2
3Copyright (C) 2004 Nicolai Haehnle.
4Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
5
6The Weather Channel (TM) funded Tungsten Graphics to develop the
7initial release of the Radeon 8500 driver under the XFree86 license.
8This notice must be preserved.
9
10All Rights Reserved.
11
12Permission is hereby granted, free of charge, to any person obtaining a
13copy of this software and associated documentation files (the "Software"),
14to deal in the Software without restriction, including without limitation
15on the rights to use, copy, modify, merge, publish, distribute, sub
16license, and/or sell copies of the Software, and to permit persons to whom
17the Software is furnished to do so, subject to the following conditions:
18
19The above copyright notice and this permission notice (including the next
20paragraph) shall be included in all copies or substantial portions of the
21Software.
22
23THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29USE OR OTHER DEALINGS IN THE SOFTWARE.
30
31**************************************************************************/
32
33#include <errno.h>
34#include "radeon_common.h"
35#include "main/simple_list.h"
36
37#if defined(USE_X86_ASM)
38#define COPY_DWORDS( dst, src, nr )					\
39do {									\
40	int __tmp;							\
41	__asm__ __volatile__( "rep ; movsl"				\
42			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
43			      : "0" (nr),				\
44			        "D" ((long)dst),			\
45			        "S" ((long)src) );			\
46} while (0)
47#else
48#define COPY_DWORDS( dst, src, nr )		\
49do {						\
50   int j;					\
51   for ( j = 0 ; j < nr ; j++ )			\
52      dst[j] = ((int *)src)[j];			\
53   dst += nr;					\
54} while (0)
55#endif
56
57void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
58{
59	int i;
60
61	if (RADEON_DEBUG & RADEON_VERTS)
62		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
63			__FUNCTION__, count, stride, (void *)out, (void *)data);
64
65	if (stride == 4)
66		COPY_DWORDS(out, data, count);
67	else
68		for (i = 0; i < count; i++) {
69			out[0] = *(int *)data;
70			out++;
71			data += stride;
72		}
73}
74
75void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
76{
77	int i;
78
79	if (RADEON_DEBUG & RADEON_VERTS)
80		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
81			__FUNCTION__, count, stride, (void *)out, (void *)data);
82
83	if (stride == 8)
84		COPY_DWORDS(out, data, count * 2);
85	else
86		for (i = 0; i < count; i++) {
87			out[0] = *(int *)data;
88			out[1] = *(int *)(data + 4);
89			out += 2;
90			data += stride;
91		}
92}
93
94void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
95{
96	int i;
97
98	if (RADEON_DEBUG & RADEON_VERTS)
99		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
100			__FUNCTION__, count, stride, (void *)out, (void *)data);
101
102	if (stride == 12) {
103		COPY_DWORDS(out, data, count * 3);
104    }
105	else
106		for (i = 0; i < count; i++) {
107			out[0] = *(int *)data;
108			out[1] = *(int *)(data + 4);
109			out[2] = *(int *)(data + 8);
110			out += 3;
111			data += stride;
112		}
113}
114
115void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
116{
117	int i;
118
119	if (RADEON_DEBUG & RADEON_VERTS)
120		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
121			__FUNCTION__, count, stride, (void *)out, (void *)data);
122
123	if (stride == 16)
124		COPY_DWORDS(out, data, count * 4);
125	else
126		for (i = 0; i < count; i++) {
127			out[0] = *(int *)data;
128			out[1] = *(int *)(data + 4);
129			out[2] = *(int *)(data + 8);
130			out[3] = *(int *)(data + 12);
131			out += 4;
132			data += stride;
133		}
134}
135
136void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
137			 const GLvoid * data, int size, int stride, int count)
138{
139	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
140	uint32_t *out;
141
142	if (stride == 0) {
143		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
144		count = 1;
145		aos->stride = 0;
146	} else {
147		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
148		aos->stride = size;
149	}
150
151	aos->components = size;
152	aos->count = count;
153
154	radeon_bo_map(aos->bo, 1);
155	out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
156	switch (size) {
157	case 1: radeonEmitVec4(out, data, stride, count); break;
158	case 2: radeonEmitVec8(out, data, stride, count); break;
159	case 3: radeonEmitVec12(out, data, stride, count); break;
160	case 4: radeonEmitVec16(out, data, stride, count); break;
161	default:
162		assert(0);
163		break;
164	}
165	radeon_bo_unmap(aos->bo);
166}
167
168void radeon_init_dma(radeonContextPtr rmesa)
169{
170	make_empty_list(&rmesa->dma.free);
171	make_empty_list(&rmesa->dma.wait);
172	make_empty_list(&rmesa->dma.reserved);
173	rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
174}
175
176void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
177{
178	struct radeon_dma_bo *dma_bo = NULL;
179	/* we set minimum sizes to at least requested size
180	   aligned to next 16 bytes. */
181	if (size > rmesa->dma.minimum_size)
182		rmesa->dma.minimum_size = (size + 15) & (~15);
183
184	radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %d\n",
185			__FUNCTION__, size, rmesa->dma.minimum_size);
186
187	if (!is_empty_list(&rmesa->dma.reserved))
188		radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
189
190	if (is_empty_list(&rmesa->dma.free)
191	      || last_elem(&rmesa->dma.free)->bo->size < size) {
192		dma_bo = CALLOC_STRUCT(radeon_dma_bo);
193		assert(dma_bo);
194
195again_alloc:
196		dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
197					    0, rmesa->dma.minimum_size, 4,
198					    RADEON_GEM_DOMAIN_GTT, 0);
199
200		if (!dma_bo->bo) {
201			rcommonFlushCmdBuf(rmesa, __FUNCTION__);
202			goto again_alloc;
203		}
204		insert_at_head(&rmesa->dma.reserved, dma_bo);
205	} else {
206		/* We push and pop buffers from end of list so we can keep
207		   counter on unused buffers for later freeing them from
208		   begin of list */
209		dma_bo = last_elem(&rmesa->dma.free);
210		remove_from_list(dma_bo);
211		insert_at_head(&rmesa->dma.reserved, dma_bo);
212	}
213
214	rmesa->dma.current_used = 0;
215	rmesa->dma.current_vertexptr = 0;
216
217	if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
218					  first_elem(&rmesa->dma.reserved)->bo,
219					  RADEON_GEM_DOMAIN_GTT, 0))
220		fprintf(stderr,"failure to revalidate BOs - badness\n");
221
222	if (is_empty_list(&rmesa->dma.reserved)) {
223        /* Cmd buff have been flushed in radeon_revalidate_bos */
224		goto again_alloc;
225	}
226	radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
227}
228
229/* Allocates a region from rmesa->dma.current.  If there isn't enough
230 * space in current, grab a new buffer (and discard what was left of current)
231 */
232void radeonAllocDmaRegion(radeonContextPtr rmesa,
233			  struct radeon_bo **pbo, int *poffset,
234			  int bytes, int alignment)
235{
236	if (RADEON_DEBUG & RADEON_IOCTL)
237		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
238
239	if (rmesa->dma.flush)
240		rmesa->dma.flush(rmesa->glCtx);
241
242	assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
243
244	alignment--;
245	rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
246
247	if (is_empty_list(&rmesa->dma.reserved)
248		|| rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
249		radeonRefillCurrentDmaRegion(rmesa, bytes);
250
251	*poffset = rmesa->dma.current_used;
252	*pbo = first_elem(&rmesa->dma.reserved)->bo;
253	radeon_bo_ref(*pbo);
254
255	/* Always align to at least 16 bytes */
256	rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
257	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
258
259	assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
260}
261
262void radeonFreeDmaRegions(radeonContextPtr rmesa)
263{
264	struct radeon_dma_bo *dma_bo;
265	struct radeon_dma_bo *temp;
266	if (RADEON_DEBUG & RADEON_DMA)
267		fprintf(stderr, "%s\n", __FUNCTION__);
268
269	foreach_s(dma_bo, temp, &rmesa->dma.free) {
270		remove_from_list(dma_bo);
271	        radeon_bo_unref(dma_bo->bo);
272		FREE(dma_bo);
273	}
274
275	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
276		remove_from_list(dma_bo);
277	        radeon_bo_unref(dma_bo->bo);
278		FREE(dma_bo);
279	}
280
281	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
282		remove_from_list(dma_bo);
283	        radeon_bo_unref(dma_bo->bo);
284		FREE(dma_bo);
285	}
286}
287
288void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
289{
290	if (is_empty_list(&rmesa->dma.reserved))
291		return;
292
293	if (RADEON_DEBUG & RADEON_IOCTL)
294		fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
295	rmesa->dma.current_used -= return_bytes;
296	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
297}
298
299static int radeon_bo_is_idle(struct radeon_bo* bo)
300{
301	uint32_t domain;
302	int ret = radeon_bo_is_busy(bo, &domain);
303	if (ret == -EINVAL) {
304		WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
305			"This may cause small performance drop for you.\n");
306	}
307	return ret != -EBUSY;
308}
309
310void radeonReleaseDmaRegions(radeonContextPtr rmesa)
311{
312	struct radeon_dma_bo *dma_bo;
313	struct radeon_dma_bo *temp;
314	const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
315	const int time = rmesa->dma.free.expire_counter;
316
317	if (RADEON_DEBUG & RADEON_DMA) {
318		size_t free = 0,
319		       wait = 0,
320		       reserved = 0;
321		foreach(dma_bo, &rmesa->dma.free)
322			++free;
323
324		foreach(dma_bo, &rmesa->dma.wait)
325			++wait;
326
327		foreach(dma_bo, &rmesa->dma.reserved)
328			++reserved;
329
330		fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
331		      __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
332	}
333
334	if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
335		/* request updated cs processing information from kernel */
336		legacy_track_pending(rmesa->radeonScreen->bom, 0);
337	}
338
339	if (!is_empty_list(&rmesa->dma.reserved))
340		radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
341
342	/* move waiting bos to free list.
343	   wait list provides gpu time to handle data before reuse */
344	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
345		if (dma_bo->expire_counter == time) {
346			WARN_ONCE("Leaking dma buffer object!\n");
347			radeon_bo_unref(dma_bo->bo);
348			remove_from_list(dma_bo);
349			FREE(dma_bo);
350			continue;
351		}
352		/* free objects that are too small to be used because of large request */
353		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
354		   radeon_bo_unref(dma_bo->bo);
355		   remove_from_list(dma_bo);
356		   FREE(dma_bo);
357		   continue;
358		}
359		if (!radeon_bo_is_idle(dma_bo->bo)) {
360			if (rmesa->radeonScreen->driScreen->dri2.enabled)
361				break;
362			continue;
363		}
364		remove_from_list(dma_bo);
365		dma_bo->expire_counter = expire_at;
366		insert_at_tail(&rmesa->dma.free, dma_bo);
367	}
368
369	/* move reserved to wait list */
370	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
371		/* free objects that are too small to be used because of large request */
372		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
373		   radeon_bo_unref(dma_bo->bo);
374		   remove_from_list(dma_bo);
375		   FREE(dma_bo);
376		   continue;
377		}
378		remove_from_list(dma_bo);
379		dma_bo->expire_counter = expire_at;
380		insert_at_tail(&rmesa->dma.wait, dma_bo);
381	}
382
383	/* free bos that have been unused for some time */
384	foreach_s(dma_bo, temp, &rmesa->dma.free) {
385		if (dma_bo->expire_counter != time)
386			break;
387		remove_from_list(dma_bo);
388	        radeon_bo_unref(dma_bo->bo);
389		FREE(dma_bo);
390	}
391
392}
393
394
395/* Flush vertices in the current dma region.
396 */
397void rcommon_flush_last_swtcl_prim( GLcontext *ctx  )
398{
399	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
400	struct radeon_dma *dma = &rmesa->dma;
401
402	if (RADEON_DEBUG & RADEON_IOCTL)
403		fprintf(stderr, "%s\n", __FUNCTION__);
404	dma->flush = NULL;
405
406	radeon_bo_unmap(rmesa->swtcl.bo);
407
408	if (!is_empty_list(&dma->reserved)) {
409	    GLuint current_offset = dma->current_used;
410
411	    assert (dma->current_used +
412		    rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
413		    dma->current_vertexptr);
414
415	    if (dma->current_used != dma->current_vertexptr) {
416		    dma->current_used = dma->current_vertexptr;
417
418		    rmesa->vtbl.swtcl_flush(ctx, current_offset);
419	    }
420	    rmesa->swtcl.numverts = 0;
421	}
422	radeon_bo_unref(rmesa->swtcl.bo);
423	rmesa->swtcl.bo = NULL;
424}
425/* Alloc space in the current dma region.
426 */
427void *
428rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
429{
430	GLuint bytes = vsize * nverts;
431	void *head;
432	if (RADEON_DEBUG & RADEON_IOCTL)
433		fprintf(stderr, "%s\n", __FUNCTION__);
434
435	if(is_empty_list(&rmesa->dma.reserved)
436	      ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
437		if (rmesa->dma.flush) {
438			rmesa->dma.flush(rmesa->glCtx);
439		}
440
441                radeonRefillCurrentDmaRegion(rmesa, bytes);
442
443		return NULL;
444	}
445
446        if (!rmesa->dma.flush) {
447		/* if cmdbuf flushed DMA restart */
448                rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
449                rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
450        }
451
452	ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
453        ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
454        ASSERT( rmesa->dma.current_used +
455                rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
456                rmesa->dma.current_vertexptr );
457
458	if (!rmesa->swtcl.bo) {
459		rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo;
460		radeon_bo_ref(rmesa->swtcl.bo);
461		radeon_bo_map(rmesa->swtcl.bo, 1);
462	}
463
464	head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr);
465	rmesa->dma.current_vertexptr += bytes;
466	rmesa->swtcl.numverts += nverts;
467	return head;
468}
469
470void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
471{
472   radeonContextPtr radeon = RADEON_CONTEXT( ctx );
473   int i;
474	if (RADEON_DEBUG & RADEON_IOCTL)
475		fprintf(stderr, "%s\n", __FUNCTION__);
476
477   if (radeon->dma.flush) {
478       radeon->dma.flush(radeon->glCtx);
479   }
480   for (i = 0; i < radeon->tcl.aos_count; i++) {
481      if (radeon->tcl.aos[i].bo) {
482         radeon_bo_unref(radeon->tcl.aos[i].bo);
483         radeon->tcl.aos[i].bo = NULL;
484
485      }
486   }
487}
488