radeon_dma.c revision cf24119d38360bfb25fa2683fe86a139826084f0
1cf24119d38360bfb25fa2683fe86a139826084f0Michel Dänzer/**************************************************************************
2
3Copyright (C) 2004 Nicolai Haehnle.
4Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
5
6The Weather Channel (TM) funded Tungsten Graphics to develop the
7initial release of the Radeon 8500 driver under the XFree86 license.
8This notice must be preserved.
9
10All Rights Reserved.
11
12Permission is hereby granted, free of charge, to any person obtaining a
13copy of this software and associated documentation files (the "Software"),
14to deal in the Software without restriction, including without limitation
15on the rights to use, copy, modify, merge, publish, distribute, sub
16license, and/or sell copies of the Software, and to permit persons to whom
17the Software is furnished to do so, subject to the following conditions:
18
19The above copyright notice and this permission notice (including the next
20paragraph) shall be included in all copies or substantial portions of the
21Software.
22
23THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29USE OR OTHER DEALINGS IN THE SOFTWARE.
30
31**************************************************************************/
32
33#include "radeon_common.h"
34
35#if defined(USE_X86_ASM)
36#define COPY_DWORDS( dst, src, nr )					\
37do {									\
38	int __tmp;							\
39	__asm__ __volatile__( "rep ; movsl"				\
40			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
41			      : "0" (nr),				\
42			        "D" ((long)dst),			\
43			        "S" ((long)src) );			\
44} while (0)
45#else
46#define COPY_DWORDS( dst, src, nr )		\
47do {						\
48   int j;					\
49   for ( j = 0 ; j < nr ; j++ )			\
50      dst[j] = ((int *)src)[j];			\
51   dst += nr;					\
52} while (0)
53#endif
54
55static void radeonEmitVec4(uint32_t *out, GLvoid * data, int stride, int count)
56{
57	int i;
58
59	if (RADEON_DEBUG & DEBUG_VERTS)
60		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
61			__FUNCTION__, count, stride, (void *)out, (void *)data);
62
63	if (stride == 4)
64		COPY_DWORDS(out, data, count);
65	else
66		for (i = 0; i < count; i++) {
67			out[0] = *(int *)data;
68			out++;
69			data += stride;
70		}
71}
72
73void radeonEmitVec8(uint32_t *out, GLvoid * data, int stride, int count)
74{
75	int i;
76
77	if (RADEON_DEBUG & DEBUG_VERTS)
78		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
79			__FUNCTION__, count, stride, (void *)out, (void *)data);
80
81	if (stride == 8)
82		COPY_DWORDS(out, data, count * 2);
83	else
84		for (i = 0; i < count; i++) {
85			out[0] = *(int *)data;
86			out[1] = *(int *)(data + 4);
87			out += 2;
88			data += stride;
89		}
90}
91
92void radeonEmitVec12(uint32_t *out, GLvoid * data, int stride, int count)
93{
94	int i;
95
96	if (RADEON_DEBUG & DEBUG_VERTS)
97		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
98			__FUNCTION__, count, stride, (void *)out, (void *)data);
99
100	if (stride == 12) {
101		COPY_DWORDS(out, data, count * 3);
102    }
103	else
104		for (i = 0; i < count; i++) {
105			out[0] = *(int *)data;
106			out[1] = *(int *)(data + 4);
107			out[2] = *(int *)(data + 8);
108			out += 3;
109			data += stride;
110		}
111}
112
113static void radeonEmitVec16(uint32_t *out, GLvoid * data, int stride, int count)
114{
115	int i;
116
117	if (RADEON_DEBUG & DEBUG_VERTS)
118		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
119			__FUNCTION__, count, stride, (void *)out, (void *)data);
120
121	if (stride == 16)
122		COPY_DWORDS(out, data, count * 4);
123	else
124		for (i = 0; i < count; i++) {
125			out[0] = *(int *)data;
126			out[1] = *(int *)(data + 4);
127			out[2] = *(int *)(data + 8);
128			out[3] = *(int *)(data + 12);
129			out += 4;
130			data += stride;
131		}
132}
133
134void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
135			 GLvoid * data, int size, int stride, int count)
136{
137	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
138	uint32_t *out;
139
140	if (stride == 0) {
141		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
142		count = 1;
143		aos->stride = 0;
144	} else {
145		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
146		aos->stride = size;
147	}
148
149	aos->components = size;
150	aos->count = count;
151
152	out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
153	switch (size) {
154	case 1: radeonEmitVec4(out, data, stride, count); break;
155	case 2: radeonEmitVec8(out, data, stride, count); break;
156	case 3: radeonEmitVec12(out, data, stride, count); break;
157	case 4: radeonEmitVec16(out, data, stride, count); break;
158	default:
159		assert(0);
160		break;
161	}
162}
163
164void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
165{
166
167	size = MAX2(size, MAX_DMA_BUF_SZ);
168
169	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
170		fprintf(stderr, "%s\n", __FUNCTION__);
171
172	if (rmesa->dma.flush) {
173		rmesa->dma.flush(rmesa->glCtx);
174	}
175
176	if (rmesa->dma.nr_released_bufs > 4) {
177		rcommonFlushCmdBuf(rmesa, __FUNCTION__);
178		rmesa->dma.nr_released_bufs = 0;
179	}
180
181	if (rmesa->dma.current) {
182		radeon_bo_unmap(rmesa->dma.current);
183		radeon_bo_unref(rmesa->dma.current);
184		rmesa->dma.current = 0;
185	}
186
187again_alloc:
188#ifdef RADEON_DEBUG_BO
189    rmesa->dma.current = radeon_bo_open(rmesa->radeonScreen->bom,
190					    0, size, 4, RADEON_GEM_DOMAIN_GTT,
191					    0, "dma.current");
192#else
193	rmesa->dma.current = radeon_bo_open(rmesa->radeonScreen->bom,
194					    0, size, 4, RADEON_GEM_DOMAIN_GTT,
195					    0);
196#endif /* RADEON_DEBUG_BO */
197
198	if (!rmesa->dma.current) {
199		rcommonFlushCmdBuf(rmesa, __FUNCTION__);
200		rmesa->dma.nr_released_bufs = 0;
201		goto again_alloc;
202	}
203
204	rmesa->dma.current_used = 0;
205	rmesa->dma.current_vertexptr = 0;
206
207	if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
208					  rmesa->dma.current,
209					  RADEON_GEM_DOMAIN_GTT, 0))
210		fprintf(stderr,"failure to revalidate BOs - badness\n");
211
212	if (!rmesa->dma.current) {
213        /* Cmd buff have been flushed in radeon_revalidate_bos */
214		rmesa->dma.nr_released_bufs = 0;
215		goto again_alloc;
216	}
217
218	radeon_bo_map(rmesa->dma.current, 1);
219}
220
221/* Allocates a region from rmesa->dma.current.  If there isn't enough
222 * space in current, grab a new buffer (and discard what was left of current)
223 */
224void radeonAllocDmaRegion(radeonContextPtr rmesa,
225			  struct radeon_bo **pbo, int *poffset,
226			  int bytes, int alignment)
227{
228	if (RADEON_DEBUG & DEBUG_IOCTL)
229		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
230
231	if (rmesa->dma.flush)
232		rmesa->dma.flush(rmesa->glCtx);
233
234	assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
235
236	alignment--;
237	rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
238
239	if (!rmesa->dma.current || rmesa->dma.current_used + bytes > rmesa->dma.current->size)
240		radeonRefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15);
241
242	*poffset = rmesa->dma.current_used;
243	*pbo = rmesa->dma.current;
244	radeon_bo_ref(*pbo);
245
246	/* Always align to at least 16 bytes */
247	rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
248	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
249
250	assert(rmesa->dma.current_used <= rmesa->dma.current->size);
251}
252
253void radeonReleaseDmaRegion(radeonContextPtr rmesa)
254{
255	if (RADEON_DEBUG & DEBUG_IOCTL)
256		fprintf(stderr, "%s %p\n", __FUNCTION__, rmesa->dma.current);
257	if (rmesa->dma.current) {
258		rmesa->dma.nr_released_bufs++;
259		radeon_bo_unmap(rmesa->dma.current);
260	        radeon_bo_unref(rmesa->dma.current);
261	}
262	rmesa->dma.current = NULL;
263}
264
265
266/* Flush vertices in the current dma region.
267 */
268void rcommon_flush_last_swtcl_prim( GLcontext *ctx  )
269{
270	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
271	struct radeon_dma *dma = &rmesa->dma;
272
273
274	if (RADEON_DEBUG & DEBUG_IOCTL)
275		fprintf(stderr, "%s %p\n", __FUNCTION__, dma->current);
276	dma->flush = NULL;
277
278	if (dma->current) {
279	    GLuint current_offset = dma->current_used;
280
281	    assert (dma->current_used +
282		    rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
283		    dma->current_vertexptr);
284
285	    if (dma->current_used != dma->current_vertexptr) {
286		    dma->current_used = dma->current_vertexptr;
287
288		    rmesa->vtbl.swtcl_flush(ctx, current_offset);
289	    }
290	    rmesa->swtcl.numverts = 0;
291	}
292}
293/* Alloc space in the current dma region.
294 */
295void *
296rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
297{
298	GLuint bytes = vsize * nverts;
299	void *head;
300restart:
301	if (!rmesa->dma.current || rmesa->dma.current_vertexptr + bytes > rmesa->dma.current->size) {
302                radeonRefillCurrentDmaRegion(rmesa, bytes);
303	}
304
305        if (!rmesa->dma.flush) {
306		/* make sure we have enough space to use this in cmdbuf */
307   		rcommonEnsureCmdBufSpace(rmesa,
308			      rmesa->hw.max_state_size + (20*sizeof(int)),
309			      __FUNCTION__);
310		/* if cmdbuf flushed DMA restart */
311		if (!rmesa->dma.current)
312			goto restart;
313                rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
314                rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
315        }
316
317	ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
318        ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
319        ASSERT( rmesa->dma.current_used +
320                rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
321                rmesa->dma.current_vertexptr );
322
323	head = (rmesa->dma.current->ptr + rmesa->dma.current_vertexptr);
324	rmesa->dma.current_vertexptr += bytes;
325	rmesa->swtcl.numverts += nverts;
326	return head;
327}
328
329void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
330{
331   radeonContextPtr radeon = RADEON_CONTEXT( ctx );
332   int i;
333
334   if (radeon->dma.flush) {
335       radeon->dma.flush(radeon->glCtx);
336   }
337   if (radeon->tcl.elt_dma_bo) {
338	   radeon_bo_unref(radeon->tcl.elt_dma_bo);
339	   radeon->tcl.elt_dma_bo = NULL;
340   }
341   for (i = 0; i < radeon->tcl.aos_count; i++) {
342      if (radeon->tcl.aos[i].bo) {
343         radeon_bo_unref(radeon->tcl.aos[i].bo);
344         radeon->tcl.aos[i].bo = NULL;
345      }
346   }
347}
348