radeon_dma.c revision 66e019c6c91e6ae3fb9e26a12d7b7782a0095a8d
1/**************************************************************************
2
3Copyright (C) 2004 Nicolai Haehnle.
4Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
5
6The Weather Channel (TM) funded Tungsten Graphics to develop the
7initial release of the Radeon 8500 driver under the XFree86 license.
8This notice must be preserved.
9
10All Rights Reserved.
11
12Permission is hereby granted, free of charge, to any person obtaining a
13copy of this software and associated documentation files (the "Software"),
14to deal in the Software without restriction, including without limitation
15on the rights to use, copy, modify, merge, publish, distribute, sub
16license, and/or sell copies of the Software, and to permit persons to whom
17the Software is furnished to do so, subject to the following conditions:
18
19The above copyright notice and this permission notice (including the next
20paragraph) shall be included in all copies or substantial portions of the
21Software.
22
23THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29USE OR OTHER DEALINGS IN THE SOFTWARE.
30
31**************************************************************************/
32
33#include "radeon_common.h"
34#include "main/simple_list.h"
35
36#if defined(USE_X86_ASM)
37#define COPY_DWORDS( dst, src, nr )					\
38do {									\
39	int __tmp;							\
40	__asm__ __volatile__( "rep ; movsl"				\
41			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
42			      : "0" (nr),				\
43			        "D" ((long)dst),			\
44			        "S" ((long)src) );			\
45} while (0)
46#else
47#define COPY_DWORDS( dst, src, nr )		\
48do {						\
49   int j;					\
50   for ( j = 0 ; j < nr ; j++ )			\
51      dst[j] = ((int *)src)[j];			\
52   dst += nr;					\
53} while (0)
54#endif
55
56void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
57{
58	int i;
59
60	if (RADEON_DEBUG & DEBUG_VERTS)
61		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
62			__FUNCTION__, count, stride, (void *)out, (void *)data);
63
64	if (stride == 4)
65		COPY_DWORDS(out, data, count);
66	else
67		for (i = 0; i < count; i++) {
68			out[0] = *(int *)data;
69			out++;
70			data += stride;
71		}
72}
73
74void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
75{
76	int i;
77
78	if (RADEON_DEBUG & DEBUG_VERTS)
79		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
80			__FUNCTION__, count, stride, (void *)out, (void *)data);
81
82	if (stride == 8)
83		COPY_DWORDS(out, data, count * 2);
84	else
85		for (i = 0; i < count; i++) {
86			out[0] = *(int *)data;
87			out[1] = *(int *)(data + 4);
88			out += 2;
89			data += stride;
90		}
91}
92
93void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
94{
95	int i;
96
97	if (RADEON_DEBUG & DEBUG_VERTS)
98		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
99			__FUNCTION__, count, stride, (void *)out, (void *)data);
100
101	if (stride == 12) {
102		COPY_DWORDS(out, data, count * 3);
103    }
104	else
105		for (i = 0; i < count; i++) {
106			out[0] = *(int *)data;
107			out[1] = *(int *)(data + 4);
108			out[2] = *(int *)(data + 8);
109			out += 3;
110			data += stride;
111		}
112}
113
114void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
115{
116	int i;
117
118	if (RADEON_DEBUG & DEBUG_VERTS)
119		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
120			__FUNCTION__, count, stride, (void *)out, (void *)data);
121
122	if (stride == 16)
123		COPY_DWORDS(out, data, count * 4);
124	else
125		for (i = 0; i < count; i++) {
126			out[0] = *(int *)data;
127			out[1] = *(int *)(data + 4);
128			out[2] = *(int *)(data + 8);
129			out[3] = *(int *)(data + 12);
130			out += 4;
131			data += stride;
132		}
133}
134
135void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
136			 const GLvoid * data, int size, int stride, int count)
137{
138	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
139	uint32_t *out;
140
141	if (stride == 0) {
142		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
143		count = 1;
144		aos->stride = 0;
145	} else {
146		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
147		aos->stride = size;
148	}
149
150	aos->components = size;
151	aos->count = count;
152
153	out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
154	switch (size) {
155	case 1: radeonEmitVec4(out, data, stride, count); break;
156	case 2: radeonEmitVec8(out, data, stride, count); break;
157	case 3: radeonEmitVec12(out, data, stride, count); break;
158	case 4: radeonEmitVec16(out, data, stride, count); break;
159	default:
160		assert(0);
161		break;
162	}
163}
164
165void radeon_init_dma(radeonContextPtr rmesa)
166{
167	make_empty_list(&rmesa->dma.free);
168	make_empty_list(&rmesa->dma.wait);
169	make_empty_list(&rmesa->dma.reserved);
170	rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
171}
172
173void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
174{
175	/* we set minimum sizes to at least requested size
176	   aligned to next 16 bytes. */
177	if (size > rmesa->dma.minimum_size)
178		rmesa->dma.minimum_size = (size + 15) & (~15);
179
180	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
181		fprintf(stderr, "%s\n", __FUNCTION__);
182
183	if (rmesa->dma.flush) {
184		rmesa->dma.flush(rmesa->glCtx);
185	}
186
187	/* unmap old reserved bo */
188	if (!is_empty_list(&rmesa->dma.reserved))
189		radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
190
191	if (is_empty_list(&rmesa->dma.free)
192	      || last_elem(&rmesa->dma.free)->bo->size < size) {
193		struct radeon_dma_bo *dma_bo = CALLOC(sizeof(struct radeon_dma_bo));
194		assert(dma_bo);
195
196again_alloc:
197		dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
198					    0, rmesa->dma.minimum_size, 4,
199					    RADEON_GEM_DOMAIN_GTT, 0);
200
201		if (!dma_bo->bo) {
202			rcommonFlushCmdBuf(rmesa, __FUNCTION__);
203			goto again_alloc;
204		}
205		insert_at_head(&rmesa->dma.reserved, dma_bo);
206	} else {
207		struct radeon_dma_bo *dma_bo = last_elem(&rmesa->dma.free);
208		assert(dma_bo->bo->cref == 1);
209		remove_from_list(dma_bo);
210		insert_at_head(&rmesa->dma.reserved, dma_bo);
211	}
212
213	rmesa->dma.current_used = 0;
214	rmesa->dma.current_vertexptr = 0;
215
216	if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
217					  first_elem(&rmesa->dma.reserved)->bo,
218					  RADEON_GEM_DOMAIN_GTT, 0))
219		fprintf(stderr,"failure to revalidate BOs - badness\n");
220
221	if (is_empty_list(&rmesa->dma.reserved)) {
222        /* Cmd buff have been flushed in radeon_revalidate_bos */
223		goto again_alloc;
224	}
225
226	radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
227}
228
229/* Allocates a region from rmesa->dma.current.  If there isn't enough
230 * space in current, grab a new buffer (and discard what was left of current)
231 */
232void radeonAllocDmaRegion(radeonContextPtr rmesa,
233			  struct radeon_bo **pbo, int *poffset,
234			  int bytes, int alignment)
235{
236	if (RADEON_DEBUG & DEBUG_IOCTL)
237		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
238
239	if (rmesa->dma.flush)
240		rmesa->dma.flush(rmesa->glCtx);
241
242	assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
243
244	alignment--;
245	rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
246
247	if (is_empty_list(&rmesa->dma.reserved)
248		|| rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
249		radeonRefillCurrentDmaRegion(rmesa, bytes);
250
251	*poffset = rmesa->dma.current_used;
252	*pbo = first_elem(&rmesa->dma.reserved)->bo;
253	radeon_bo_ref(*pbo);
254
255	/* Always align to at least 16 bytes */
256	rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
257	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
258
259	assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
260}
261
262void radeonFreeDmaRegions(radeonContextPtr rmesa)
263{
264	struct radeon_dma_bo *dma_bo;
265	struct radeon_dma_bo *temp;
266	if (RADEON_DEBUG & DEBUG_IOCTL)
267		fprintf(stderr, "%s\n", __FUNCTION__);
268
269	foreach_s(dma_bo, temp, &rmesa->dma.free) {
270		remove_from_list(dma_bo);
271	        radeon_bo_unref(dma_bo->bo);
272		FREE(dma_bo);
273	}
274
275	foreach_s(dma_bo, temp, &rmesa->dma.free) {
276		remove_from_list(dma_bo);
277	        radeon_bo_unref(dma_bo->bo);
278		FREE(dma_bo);
279	}
280
281	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
282		remove_from_list(dma_bo);
283		radeon_bo_unmap(dma_bo->bo);
284	        radeon_bo_unref(dma_bo->bo);
285		FREE(dma_bo);
286	}
287}
288
289void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
290{
291	if (is_empty_list(&rmesa->dma.reserved))
292		return;
293
294	if (RADEON_DEBUG & DEBUG_IOCTL)
295		fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
296	rmesa->dma.current_used -= return_bytes;
297	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
298}
299
300static int radeon_bo_is_idle(struct radeon_bo* bo)
301{
302	return bo->cref == 1;
303}
304
305void radeonReleaseDmaRegions(radeonContextPtr rmesa)
306{
307	struct radeon_dma_bo *dma_bo;
308	struct radeon_dma_bo *temp;
309	const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
310	const int time = rmesa->dma.free.expire_counter;
311	if (RADEON_DEBUG & DEBUG_IOCTL)
312		fprintf(stderr, "%s\n", __FUNCTION__);
313
314	/* move waiting bos to free list.
315	   wait list provides gpu time to handle data before reuse */
316	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
317		if (dma_bo->expire_counter == time) {
318			WARN_ONCE("Leaking dma buffer object!\n");
319			radeon_bo_unref(dma_bo->bo);
320			remove_from_list(dma_bo);
321			FREE(dma_bo);
322			continue;
323		}
324		/* free objects that are too small to be used because of large request */
325		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
326		   radeon_bo_unref(dma_bo->bo);
327		   remove_from_list(dma_bo);
328		   FREE(dma_bo);
329		   continue;
330		}
331		if (!radeon_bo_is_idle(dma_bo->bo))
332			continue;
333		remove_from_list(dma_bo);
334		dma_bo->expire_counter = expire_at;
335		insert_at_tail(&rmesa->dma.free, dma_bo);
336	}
337
338	/* unmap the last dma region */
339	if (!is_empty_list(&rmesa->dma.reserved))
340		radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
341	/* move reserved to wait list */
342	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
343		/* free objects that are too small to be used because of large request */
344		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
345		   radeon_bo_unref(dma_bo->bo);
346		   remove_from_list(dma_bo);
347		   FREE(dma_bo);
348		   continue;
349		}
350		remove_from_list(dma_bo);
351		dma_bo->expire_counter = expire_at;
352		insert_at_tail(&rmesa->dma.wait, dma_bo);
353	}
354
355	/* free bos that have been unused for some time */
356	foreach_s(dma_bo, temp, &rmesa->dma.free) {
357		if (dma_bo->expire_counter != time)
358			break;
359		remove_from_list(dma_bo);
360	        radeon_bo_unref(dma_bo->bo);
361		FREE(dma_bo);
362	}
363
364}
365
366
367/* Flush vertices in the current dma region.
368 */
369void rcommon_flush_last_swtcl_prim( GLcontext *ctx  )
370{
371	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
372	struct radeon_dma *dma = &rmesa->dma;
373
374
375	if (RADEON_DEBUG & DEBUG_IOCTL)
376		fprintf(stderr, "%s\n", __FUNCTION__);
377	dma->flush = NULL;
378
379	if (!is_empty_list(&dma->reserved)) {
380	    GLuint current_offset = dma->current_used;
381
382	    assert (dma->current_used +
383		    rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
384		    dma->current_vertexptr);
385
386	    if (dma->current_used != dma->current_vertexptr) {
387		    dma->current_used = dma->current_vertexptr;
388
389		    rmesa->vtbl.swtcl_flush(ctx, current_offset);
390	    }
391	    rmesa->swtcl.numverts = 0;
392	}
393}
394/* Alloc space in the current dma region.
395 */
396void *
397rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
398{
399	GLuint bytes = vsize * nverts;
400	void *head;
401restart:
402	if (RADEON_DEBUG & DEBUG_IOCTL)
403		fprintf(stderr, "%s\n", __FUNCTION__);
404	if (is_empty_list(&rmesa->dma.reserved)
405		|| rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
406                radeonRefillCurrentDmaRegion(rmesa, bytes);
407	}
408
409        if (!rmesa->dma.flush) {
410		/* make sure we have enough space to use this in cmdbuf */
411   		rcommonEnsureCmdBufSpace(rmesa,
412			      rmesa->hw.max_state_size + (20*sizeof(int)),
413			      __FUNCTION__);
414		/* if cmdbuf flushed DMA restart */
415		if (is_empty_list(&rmesa->dma.reserved))
416			goto restart;
417                rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
418                rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
419        }
420
421	ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
422        ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
423        ASSERT( rmesa->dma.current_used +
424                rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
425                rmesa->dma.current_vertexptr );
426
427	head = (first_elem(&rmesa->dma.reserved)->bo->ptr + rmesa->dma.current_vertexptr);
428	rmesa->dma.current_vertexptr += bytes;
429	rmesa->swtcl.numverts += nverts;
430	return head;
431}
432
433void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
434{
435   radeonContextPtr radeon = RADEON_CONTEXT( ctx );
436   int i;
437	if (RADEON_DEBUG & DEBUG_IOCTL)
438		fprintf(stderr, "%s\n", __FUNCTION__);
439
440   if (radeon->dma.flush) {
441       radeon->dma.flush(radeon->glCtx);
442   }
443   for (i = 0; i < radeon->tcl.aos_count; i++) {
444      if (radeon->tcl.aos[i].bo) {
445         radeon_bo_unref(radeon->tcl.aos[i].bo);
446         radeon->tcl.aos[i].bo = NULL;
447
448      }
449   }
450}
451