radeon_dma.c revision 1c28073fdfb56a241424c739b57845f47fa05002
1/**************************************************************************
2
3Copyright (C) 2004 Nicolai Haehnle.
4Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
5
6The Weather Channel (TM) funded Tungsten Graphics to develop the
7initial release of the Radeon 8500 driver under the XFree86 license.
8This notice must be preserved.
9
10All Rights Reserved.
11
12Permission is hereby granted, free of charge, to any person obtaining a
13copy of this software and associated documentation files (the "Software"),
14to deal in the Software without restriction, including without limitation
15on the rights to use, copy, modify, merge, publish, distribute, sub
16license, and/or sell copies of the Software, and to permit persons to whom
17the Software is furnished to do so, subject to the following conditions:
18
19The above copyright notice and this permission notice (including the next
20paragraph) shall be included in all copies or substantial portions of the
21Software.
22
23THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29USE OR OTHER DEALINGS IN THE SOFTWARE.
30
31**************************************************************************/
32
33#include <errno.h>
34#include "radeon_common.h"
35#include "main/simple_list.h"
36
37#if defined(USE_X86_ASM)
38#define COPY_DWORDS( dst, src, nr )					\
39do {									\
40	int __tmp;							\
41	__asm__ __volatile__( "rep ; movsl"				\
42			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
43			      : "0" (nr),				\
44			        "D" ((long)dst),			\
45			        "S" ((long)src) );			\
46} while (0)
47#else
48#define COPY_DWORDS( dst, src, nr )		\
49do {						\
50   int j;					\
51   for ( j = 0 ; j < nr ; j++ )			\
52      dst[j] = ((int *)src)[j];			\
53   dst += nr;					\
54} while (0)
55#endif
56
57void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
58{
59	int i;
60
61	if (RADEON_DEBUG & RADEON_VERTS)
62		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
63			__FUNCTION__, count, stride, (void *)out, (void *)data);
64
65	if (stride == 4)
66		COPY_DWORDS(out, data, count);
67	else
68		for (i = 0; i < count; i++) {
69			out[0] = *(int *)data;
70			out++;
71			data += stride;
72		}
73}
74
75void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
76{
77	int i;
78
79	if (RADEON_DEBUG & RADEON_VERTS)
80		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
81			__FUNCTION__, count, stride, (void *)out, (void *)data);
82
83	if (stride == 8)
84		COPY_DWORDS(out, data, count * 2);
85	else
86		for (i = 0; i < count; i++) {
87			out[0] = *(int *)data;
88			out[1] = *(int *)(data + 4);
89			out += 2;
90			data += stride;
91		}
92}
93
94void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
95{
96	int i;
97
98	if (RADEON_DEBUG & RADEON_VERTS)
99		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
100			__FUNCTION__, count, stride, (void *)out, (void *)data);
101
102	if (stride == 12) {
103		COPY_DWORDS(out, data, count * 3);
104    }
105	else
106		for (i = 0; i < count; i++) {
107			out[0] = *(int *)data;
108			out[1] = *(int *)(data + 4);
109			out[2] = *(int *)(data + 8);
110			out += 3;
111			data += stride;
112		}
113}
114
115void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
116{
117	int i;
118
119	if (RADEON_DEBUG & RADEON_VERTS)
120		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
121			__FUNCTION__, count, stride, (void *)out, (void *)data);
122
123	if (stride == 16)
124		COPY_DWORDS(out, data, count * 4);
125	else
126		for (i = 0; i < count; i++) {
127			out[0] = *(int *)data;
128			out[1] = *(int *)(data + 4);
129			out[2] = *(int *)(data + 8);
130			out[3] = *(int *)(data + 12);
131			out += 4;
132			data += stride;
133		}
134}
135
136void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
137			 const GLvoid * data, int size, int stride, int count)
138{
139	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
140	uint32_t *out;
141
142	if (stride == 0) {
143		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
144		count = 1;
145		aos->stride = 0;
146	} else {
147		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
148		aos->stride = size;
149	}
150
151	aos->components = size;
152	aos->count = count;
153
154	radeon_bo_map(aos->bo, 1);
155	out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
156	switch (size) {
157	case 1: radeonEmitVec4(out, data, stride, count); break;
158	case 2: radeonEmitVec8(out, data, stride, count); break;
159	case 3: radeonEmitVec12(out, data, stride, count); break;
160	case 4: radeonEmitVec16(out, data, stride, count); break;
161	default:
162		assert(0);
163		break;
164	}
165	radeon_bo_unmap(aos->bo);
166}
167
168void radeon_init_dma(radeonContextPtr rmesa)
169{
170	make_empty_list(&rmesa->dma.free);
171	make_empty_list(&rmesa->dma.wait);
172	make_empty_list(&rmesa->dma.reserved);
173	rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
174}
175
176void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
177{
178	struct radeon_dma_bo *dma_bo = NULL;
179	/* we set minimum sizes to at least requested size
180	   aligned to next 16 bytes. */
181	if (size > rmesa->dma.minimum_size)
182		rmesa->dma.minimum_size = (size + 15) & (~15);
183
184	radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %d\n",
185			__FUNCTION__, size, rmesa->dma.minimum_size);
186
187
188	if (is_empty_list(&rmesa->dma.free)
189	      || last_elem(&rmesa->dma.free)->bo->size < size) {
190		dma_bo = CALLOC_STRUCT(radeon_dma_bo);
191		assert(dma_bo);
192
193again_alloc:
194		dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
195					    0, rmesa->dma.minimum_size, 4,
196					    RADEON_GEM_DOMAIN_GTT, 0);
197
198		if (!dma_bo->bo) {
199			rcommonFlushCmdBuf(rmesa, __FUNCTION__);
200			goto again_alloc;
201		}
202		insert_at_head(&rmesa->dma.reserved, dma_bo);
203	} else {
204		/* We push and pop buffers from end of list so we can keep
205		   counter on unused buffers for later freeing them from
206		   begin of list */
207		dma_bo = last_elem(&rmesa->dma.free);
208		remove_from_list(dma_bo);
209		insert_at_head(&rmesa->dma.reserved, dma_bo);
210	}
211
212	rmesa->dma.current_used = 0;
213	rmesa->dma.current_vertexptr = 0;
214
215	if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
216					  first_elem(&rmesa->dma.reserved)->bo,
217					  RADEON_GEM_DOMAIN_GTT, 0))
218		fprintf(stderr,"failure to revalidate BOs - badness\n");
219
220	if (is_empty_list(&rmesa->dma.reserved)) {
221        /* Cmd buff have been flushed in radeon_revalidate_bos */
222		goto again_alloc;
223	}
224}
225
226/* Allocates a region from rmesa->dma.current.  If there isn't enough
227 * space in current, grab a new buffer (and discard what was left of current)
228 */
229void radeonAllocDmaRegion(radeonContextPtr rmesa,
230			  struct radeon_bo **pbo, int *poffset,
231			  int bytes, int alignment)
232{
233	if (RADEON_DEBUG & RADEON_IOCTL)
234		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
235
236	if (rmesa->dma.flush)
237		rmesa->dma.flush(rmesa->glCtx);
238
239	assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
240
241	alignment--;
242	rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
243
244	if (is_empty_list(&rmesa->dma.reserved)
245		|| rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
246		radeonRefillCurrentDmaRegion(rmesa, bytes);
247
248	*poffset = rmesa->dma.current_used;
249	*pbo = first_elem(&rmesa->dma.reserved)->bo;
250	radeon_bo_ref(*pbo);
251
252	/* Always align to at least 16 bytes */
253	rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
254	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
255
256	assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
257}
258
259void radeonFreeDmaRegions(radeonContextPtr rmesa)
260{
261	struct radeon_dma_bo *dma_bo;
262	struct radeon_dma_bo *temp;
263	if (RADEON_DEBUG & RADEON_DMA)
264		fprintf(stderr, "%s\n", __FUNCTION__);
265
266	foreach_s(dma_bo, temp, &rmesa->dma.free) {
267		remove_from_list(dma_bo);
268	        radeon_bo_unref(dma_bo->bo);
269		FREE(dma_bo);
270	}
271
272	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
273		remove_from_list(dma_bo);
274	        radeon_bo_unref(dma_bo->bo);
275		FREE(dma_bo);
276	}
277
278	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
279		remove_from_list(dma_bo);
280	        radeon_bo_unref(dma_bo->bo);
281		FREE(dma_bo);
282	}
283}
284
285void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
286{
287	if (is_empty_list(&rmesa->dma.reserved))
288		return;
289
290	if (RADEON_DEBUG & RADEON_IOCTL)
291		fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
292	rmesa->dma.current_used -= return_bytes;
293	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
294}
295
296static int radeon_bo_is_idle(struct radeon_bo* bo)
297{
298	uint32_t domain;
299	int ret = radeon_bo_is_busy(bo, &domain);
300	if (ret == -EINVAL) {
301		WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
302			"This may cause small performance drop for you.\n");
303	}
304	return ret != -EBUSY;
305}
306
307void radeonReleaseDmaRegions(radeonContextPtr rmesa)
308{
309	struct radeon_dma_bo *dma_bo;
310	struct radeon_dma_bo *temp;
311	const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
312	const int time = rmesa->dma.free.expire_counter;
313
314	if (RADEON_DEBUG & RADEON_DMA) {
315		size_t free = 0,
316		       wait = 0,
317		       reserved = 0;
318		foreach(dma_bo, &rmesa->dma.free)
319			++free;
320
321		foreach(dma_bo, &rmesa->dma.wait)
322			++wait;
323
324		foreach(dma_bo, &rmesa->dma.reserved)
325			++reserved;
326
327		fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
328		      __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
329	}
330
331	if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
332		/* request updated cs processing information from kernel */
333		legacy_track_pending(rmesa->radeonScreen->bom, 0);
334	}
335	/* move waiting bos to free list.
336	   wait list provides gpu time to handle data before reuse */
337	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
338		if (dma_bo->expire_counter == time) {
339			WARN_ONCE("Leaking dma buffer object!\n");
340			radeon_bo_unref(dma_bo->bo);
341			remove_from_list(dma_bo);
342			FREE(dma_bo);
343			continue;
344		}
345		/* free objects that are too small to be used because of large request */
346		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
347		   radeon_bo_unref(dma_bo->bo);
348		   remove_from_list(dma_bo);
349		   FREE(dma_bo);
350		   continue;
351		}
352		if (!radeon_bo_is_idle(dma_bo->bo))
353			continue;
354		remove_from_list(dma_bo);
355		dma_bo->expire_counter = expire_at;
356		insert_at_tail(&rmesa->dma.free, dma_bo);
357	}
358
359	/* move reserved to wait list */
360	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
361		/* free objects that are too small to be used because of large request */
362		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
363		   radeon_bo_unref(dma_bo->bo);
364		   remove_from_list(dma_bo);
365		   FREE(dma_bo);
366		   continue;
367		}
368		remove_from_list(dma_bo);
369		dma_bo->expire_counter = expire_at;
370		insert_at_tail(&rmesa->dma.wait, dma_bo);
371	}
372
373	/* free bos that have been unused for some time */
374	foreach_s(dma_bo, temp, &rmesa->dma.free) {
375		if (dma_bo->expire_counter != time)
376			break;
377		remove_from_list(dma_bo);
378	        radeon_bo_unref(dma_bo->bo);
379		FREE(dma_bo);
380	}
381
382}
383
384
385/* Flush vertices in the current dma region.
386 */
387void rcommon_flush_last_swtcl_prim( GLcontext *ctx  )
388{
389	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
390	struct radeon_dma *dma = &rmesa->dma;
391
392	if (RADEON_DEBUG & RADEON_IOCTL)
393		fprintf(stderr, "%s\n", __FUNCTION__);
394	dma->flush = NULL;
395
396	radeon_bo_unmap(rmesa->swtcl.bo);
397
398	if (!is_empty_list(&dma->reserved)) {
399	    GLuint current_offset = dma->current_used;
400
401	    assert (dma->current_used +
402		    rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
403		    dma->current_vertexptr);
404
405	    if (dma->current_used != dma->current_vertexptr) {
406		    dma->current_used = dma->current_vertexptr;
407
408		    rmesa->vtbl.swtcl_flush(ctx, current_offset);
409	    }
410	    rmesa->swtcl.numverts = 0;
411	}
412	radeon_bo_unref(rmesa->swtcl.bo);
413	rmesa->swtcl.bo = NULL;
414}
415/* Alloc space in the current dma region.
416 */
417void *
418rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
419{
420	GLuint bytes = vsize * nverts;
421	void *head;
422	if (RADEON_DEBUG & RADEON_IOCTL)
423		fprintf(stderr, "%s\n", __FUNCTION__);
424
425	if(is_empty_list(&rmesa->dma.reserved)
426	      ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
427		if (rmesa->dma.flush) {
428			rmesa->dma.flush(rmesa->glCtx);
429		}
430
431                radeonRefillCurrentDmaRegion(rmesa, bytes);
432
433		return NULL;
434	}
435
436        if (!rmesa->dma.flush) {
437		/* if cmdbuf flushed DMA restart */
438                rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
439                rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
440        }
441
442	ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
443        ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
444        ASSERT( rmesa->dma.current_used +
445                rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
446                rmesa->dma.current_vertexptr );
447
448	if (!rmesa->swtcl.bo) {
449		rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo;
450		radeon_bo_ref(rmesa->swtcl.bo);
451		radeon_bo_map(rmesa->swtcl.bo, 1);
452	}
453
454	head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr);
455	rmesa->dma.current_vertexptr += bytes;
456	rmesa->swtcl.numverts += nverts;
457	return head;
458}
459
460void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
461{
462   radeonContextPtr radeon = RADEON_CONTEXT( ctx );
463   int i;
464	if (RADEON_DEBUG & RADEON_IOCTL)
465		fprintf(stderr, "%s\n", __FUNCTION__);
466
467   if (radeon->dma.flush) {
468       radeon->dma.flush(radeon->glCtx);
469   }
470   for (i = 0; i < radeon->tcl.aos_count; i++) {
471      if (radeon->tcl.aos[i].bo) {
472         radeon_bo_unref(radeon->tcl.aos[i].bo);
473         radeon->tcl.aos[i].bo = NULL;
474
475      }
476   }
477}
478