radeon_dma.c revision c3380ded10200f2df0cfba4abbe9a9eb892f7cbb
1/**************************************************************************
2
3Copyright (C) 2004 Nicolai Haehnle.
4Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
5
6The Weather Channel (TM) funded Tungsten Graphics to develop the
7initial release of the Radeon 8500 driver under the XFree86 license.
8This notice must be preserved.
9
10All Rights Reserved.
11
12Permission is hereby granted, free of charge, to any person obtaining a
13copy of this software and associated documentation files (the "Software"),
14to deal in the Software without restriction, including without limitation
15on the rights to use, copy, modify, merge, publish, distribute, sub
16license, and/or sell copies of the Software, and to permit persons to whom
17the Software is furnished to do so, subject to the following conditions:
18
19The above copyright notice and this permission notice (including the next
20paragraph) shall be included in all copies or substantial portions of the
21Software.
22
23THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29USE OR OTHER DEALINGS IN THE SOFTWARE.
30
31**************************************************************************/
32
33#include "radeon_common.h"
34
35#if defined(USE_X86_ASM)
36#define COPY_DWORDS( dst, src, nr )					\
37do {									\
38	int __tmp;							\
39	__asm__ __volatile__( "rep ; movsl"				\
40			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
41			      : "0" (nr),				\
42			        "D" ((long)dst),			\
43			        "S" ((long)src) );			\
44} while (0)
45#else
46#define COPY_DWORDS( dst, src, nr )		\
47do {						\
48   int j;					\
49   for ( j = 0 ; j < nr ; j++ )			\
50      dst[j] = ((int *)src)[j];			\
51   dst += nr;					\
52} while (0)
53#endif
54
55void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
56{
57	int i;
58
59	if (RADEON_DEBUG & DEBUG_VERTS)
60		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
61			__FUNCTION__, count, stride, (void *)out, (void *)data);
62
63	if (stride == 4)
64		COPY_DWORDS(out, data, count);
65	else
66		for (i = 0; i < count; i++) {
67			out[0] = *(int *)data;
68			out++;
69			data += stride;
70		}
71}
72
73void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
74{
75	int i;
76
77	if (RADEON_DEBUG & DEBUG_VERTS)
78		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
79			__FUNCTION__, count, stride, (void *)out, (void *)data);
80
81	if (stride == 8)
82		COPY_DWORDS(out, data, count * 2);
83	else
84		for (i = 0; i < count; i++) {
85			out[0] = *(int *)data;
86			out[1] = *(int *)(data + 4);
87			out += 2;
88			data += stride;
89		}
90}
91
92void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
93{
94	int i;
95
96	if (RADEON_DEBUG & DEBUG_VERTS)
97		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
98			__FUNCTION__, count, stride, (void *)out, (void *)data);
99
100	if (stride == 12) {
101		COPY_DWORDS(out, data, count * 3);
102    }
103	else
104		for (i = 0; i < count; i++) {
105			out[0] = *(int *)data;
106			out[1] = *(int *)(data + 4);
107			out[2] = *(int *)(data + 8);
108			out += 3;
109			data += stride;
110		}
111}
112
113void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
114{
115	int i;
116
117	if (RADEON_DEBUG & DEBUG_VERTS)
118		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
119			__FUNCTION__, count, stride, (void *)out, (void *)data);
120
121	if (stride == 16)
122		COPY_DWORDS(out, data, count * 4);
123	else
124		for (i = 0; i < count; i++) {
125			out[0] = *(int *)data;
126			out[1] = *(int *)(data + 4);
127			out[2] = *(int *)(data + 8);
128			out[3] = *(int *)(data + 12);
129			out += 4;
130			data += stride;
131		}
132}
133
134void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
135			 const GLvoid * data, int size, int stride, int count)
136{
137	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
138	uint32_t *out;
139
140	if (stride == 0) {
141		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
142		count = 1;
143		aos->stride = 0;
144	} else {
145		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
146		aos->stride = size;
147	}
148
149	aos->components = size;
150	aos->count = count;
151
152	out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
153	switch (size) {
154	case 1: radeonEmitVec4(out, data, stride, count); break;
155	case 2: radeonEmitVec8(out, data, stride, count); break;
156	case 3: radeonEmitVec12(out, data, stride, count); break;
157	case 4: radeonEmitVec16(out, data, stride, count); break;
158	default:
159		assert(0);
160		break;
161	}
162}
163
164void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
165{
166
167	size = MAX2(size, MAX_DMA_BUF_SZ);
168
169	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
170		fprintf(stderr, "%s\n", __FUNCTION__);
171
172	if (rmesa->dma.flush) {
173		rmesa->dma.flush(rmesa->glCtx);
174	}
175
176	if (rmesa->dma.nr_released_bufs > 4) {
177		rcommonFlushCmdBuf(rmesa, __FUNCTION__);
178		rmesa->dma.nr_released_bufs = 0;
179	}
180
181	if (rmesa->dma.current) {
182		radeon_bo_unmap(rmesa->dma.current);
183		radeon_bo_unref(rmesa->dma.current);
184		rmesa->dma.current = 0;
185	}
186
187again_alloc:
188	rmesa->dma.current = radeon_bo_open(rmesa->radeonScreen->bom,
189					    0, size, 4, RADEON_GEM_DOMAIN_GTT,
190					    0);
191
192	if (!rmesa->dma.current) {
193		rcommonFlushCmdBuf(rmesa, __FUNCTION__);
194		rmesa->dma.nr_released_bufs = 0;
195		goto again_alloc;
196	}
197
198	rmesa->dma.current_used = 0;
199	rmesa->dma.current_vertexptr = 0;
200
201	if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
202					  rmesa->dma.current,
203					  RADEON_GEM_DOMAIN_GTT, 0))
204		fprintf(stderr,"failure to revalidate BOs - badness\n");
205
206	if (!rmesa->dma.current) {
207        /* Cmd buff have been flushed in radeon_revalidate_bos */
208		rmesa->dma.nr_released_bufs = 0;
209		goto again_alloc;
210	}
211
212	radeon_bo_map(rmesa->dma.current, 1);
213}
214
215/* Allocates a region from rmesa->dma.current.  If there isn't enough
216 * space in current, grab a new buffer (and discard what was left of current)
217 */
218void radeonAllocDmaRegion(radeonContextPtr rmesa,
219			  struct radeon_bo **pbo, int *poffset,
220			  int bytes, int alignment)
221{
222	if (RADEON_DEBUG & DEBUG_IOCTL)
223		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
224
225	if (rmesa->dma.flush)
226		rmesa->dma.flush(rmesa->glCtx);
227
228	assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
229
230	alignment--;
231	rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
232
233	if (!rmesa->dma.current || rmesa->dma.current_used + bytes > rmesa->dma.current->size)
234		radeonRefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15);
235
236	*poffset = rmesa->dma.current_used;
237	*pbo = rmesa->dma.current;
238	radeon_bo_ref(*pbo);
239
240	/* Always align to at least 16 bytes */
241	rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
242	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
243
244	assert(rmesa->dma.current_used <= rmesa->dma.current->size);
245}
246
247void radeonReleaseDmaRegion(radeonContextPtr rmesa)
248{
249	if (RADEON_DEBUG & DEBUG_IOCTL)
250		fprintf(stderr, "%s %p\n", __FUNCTION__, rmesa->dma.current);
251	if (rmesa->dma.current) {
252		rmesa->dma.nr_released_bufs++;
253		radeon_bo_unmap(rmesa->dma.current);
254	        radeon_bo_unref(rmesa->dma.current);
255	}
256	rmesa->dma.current = NULL;
257}
258
259
260/* Flush vertices in the current dma region.
261 */
262void rcommon_flush_last_swtcl_prim( GLcontext *ctx  )
263{
264	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
265	struct radeon_dma *dma = &rmesa->dma;
266
267
268	if (RADEON_DEBUG & DEBUG_IOCTL)
269		fprintf(stderr, "%s %p\n", __FUNCTION__, dma->current);
270	dma->flush = NULL;
271
272	if (dma->current) {
273	    GLuint current_offset = dma->current_used;
274
275	    assert (dma->current_used +
276		    rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
277		    dma->current_vertexptr);
278
279	    if (dma->current_used != dma->current_vertexptr) {
280		    dma->current_used = dma->current_vertexptr;
281
282		    rmesa->vtbl.swtcl_flush(ctx, current_offset);
283	    }
284	    rmesa->swtcl.numverts = 0;
285	}
286}
287/* Alloc space in the current dma region.
288 */
289void *
290rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
291{
292	GLuint bytes = vsize * nverts;
293	void *head;
294restart:
295	if (!rmesa->dma.current || rmesa->dma.current_vertexptr + bytes > rmesa->dma.current->size) {
296                radeonRefillCurrentDmaRegion(rmesa, bytes);
297	}
298
299        if (!rmesa->dma.flush) {
300		/* make sure we have enough space to use this in cmdbuf */
301   		rcommonEnsureCmdBufSpace(rmesa,
302			      rmesa->hw.max_state_size + (20*sizeof(int)),
303			      __FUNCTION__);
304		/* if cmdbuf flushed DMA restart */
305		if (!rmesa->dma.current)
306			goto restart;
307                rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
308                rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
309        }
310
311	ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
312        ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
313        ASSERT( rmesa->dma.current_used +
314                rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
315                rmesa->dma.current_vertexptr );
316
317	head = (rmesa->dma.current->ptr + rmesa->dma.current_vertexptr);
318	rmesa->dma.current_vertexptr += bytes;
319	rmesa->swtcl.numverts += nverts;
320	return head;
321}
322
323void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
324{
325   radeonContextPtr radeon = RADEON_CONTEXT( ctx );
326   int i;
327
328   if (radeon->dma.flush) {
329       radeon->dma.flush(radeon->glCtx);
330   }
331   if (radeon->tcl.elt_dma_bo) {
332	   radeon_bo_unref(radeon->tcl.elt_dma_bo);
333	   radeon->tcl.elt_dma_bo = NULL;
334   }
335   for (i = 0; i < radeon->tcl.aos_count; i++) {
336      if (radeon->tcl.aos[i].bo) {
337         radeon_bo_unref(radeon->tcl.aos[i].bo);
338         radeon->tcl.aos[i].bo = NULL;
339      }
340   }
341}
342