radeon_dma.c revision 1386e8a6ba9732c578e0122de245abfd578a7d1d
1/**************************************************************************
2
3Copyright (C) 2004 Nicolai Haehnle.
4Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
5
6The Weather Channel (TM) funded Tungsten Graphics to develop the
7initial release of the Radeon 8500 driver under the XFree86 license.
8This notice must be preserved.
9
10All Rights Reserved.
11
12Permission is hereby granted, free of charge, to any person obtaining a
13copy of this software and associated documentation files (the "Software"),
14to deal in the Software without restriction, including without limitation
15on the rights to use, copy, modify, merge, publish, distribute, sub
16license, and/or sell copies of the Software, and to permit persons to whom
17the Software is furnished to do so, subject to the following conditions:
18
19The above copyright notice and this permission notice (including the next
20paragraph) shall be included in all copies or substantial portions of the
21Software.
22
23THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29USE OR OTHER DEALINGS IN THE SOFTWARE.
30
31**************************************************************************/
32
33#include "radeon_common.h"
34
35#if defined(USE_X86_ASM)
36#define COPY_DWORDS( dst, src, nr )					\
37do {									\
38	int __tmp;							\
39	__asm__ __volatile__( "rep ; movsl"				\
40			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
41			      : "0" (nr),				\
42			        "D" ((long)dst),			\
43			        "S" ((long)src) );			\
44} while (0)
45#else
46#define COPY_DWORDS( dst, src, nr )		\
47do {						\
48   int j;					\
49   for ( j = 0 ; j < nr ; j++ )			\
50      dst[j] = ((int *)src)[j];			\
51   dst += nr;					\
52} while (0)
53#endif
54
55static void radeonEmitVec4(uint32_t *out, GLvoid * data, int stride, int count)
56{
57	int i;
58
59	if (RADEON_DEBUG & DEBUG_VERTS)
60		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
61			__FUNCTION__, count, stride, (void *)out, (void *)data);
62
63	if (stride == 4)
64		COPY_DWORDS(out, data, count);
65	else
66		for (i = 0; i < count; i++) {
67			out[0] = *(int *)data;
68			out++;
69			data += stride;
70		}
71}
72
73void radeonEmitVec8(uint32_t *out, GLvoid * data, int stride, int count)
74{
75	int i;
76
77	if (RADEON_DEBUG & DEBUG_VERTS)
78		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
79			__FUNCTION__, count, stride, (void *)out, (void *)data);
80
81	if (stride == 8)
82		COPY_DWORDS(out, data, count * 2);
83	else
84		for (i = 0; i < count; i++) {
85			out[0] = *(int *)data;
86			out[1] = *(int *)(data + 4);
87			out += 2;
88			data += stride;
89		}
90}
91
92void radeonEmitVec12(uint32_t *out, GLvoid * data, int stride, int count)
93{
94	int i;
95
96	if (RADEON_DEBUG & DEBUG_VERTS)
97		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
98			__FUNCTION__, count, stride, (void *)out, (void *)data);
99
100	if (stride == 12) {
101		COPY_DWORDS(out, data, count * 3);
102    }
103	else
104		for (i = 0; i < count; i++) {
105			out[0] = *(int *)data;
106			out[1] = *(int *)(data + 4);
107			out[2] = *(int *)(data + 8);
108			out += 3;
109			data += stride;
110		}
111}
112
113static void radeonEmitVec16(uint32_t *out, GLvoid * data, int stride, int count)
114{
115	int i;
116
117	if (RADEON_DEBUG & DEBUG_VERTS)
118		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
119			__FUNCTION__, count, stride, (void *)out, (void *)data);
120
121	if (stride == 16)
122		COPY_DWORDS(out, data, count * 4);
123	else
124		for (i = 0; i < count; i++) {
125			out[0] = *(int *)data;
126			out[1] = *(int *)(data + 4);
127			out[2] = *(int *)(data + 8);
128			out[3] = *(int *)(data + 12);
129			out += 4;
130			data += stride;
131		}
132}
133
134void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
135			 GLvoid * data, int size, int stride, int count)
136{
137	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
138	uint32_t *out;
139
140	if (stride == 0) {
141		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
142		count = 1;
143		aos->stride = 0;
144	} else {
145		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
146		aos->stride = size;
147	}
148
149	aos->components = size;
150	aos->count = count;
151
152	out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
153	switch (size) {
154	case 1: radeonEmitVec4(out, data, stride, count); break;
155	case 2: radeonEmitVec8(out, data, stride, count); break;
156	case 3: radeonEmitVec12(out, data, stride, count); break;
157	case 4: radeonEmitVec16(out, data, stride, count); break;
158	default:
159		assert(0);
160		break;
161	}
162}
163
164void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
165{
166	struct radeon_cs_space_check bos[1];
167	int flushed = 0, ret;
168
169	size = MAX2(size, MAX_DMA_BUF_SZ * 16);
170
171	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
172		fprintf(stderr, "%s\n", __FUNCTION__);
173
174	if (rmesa->dma.flush) {
175		rmesa->dma.flush(rmesa->glCtx);
176	}
177
178	if (rmesa->dma.nr_released_bufs > 4) {
179		rcommonFlushCmdBuf(rmesa, __FUNCTION__);
180		rmesa->dma.nr_released_bufs = 0;
181	}
182
183	if (rmesa->dma.current) {
184		radeon_bo_unmap(rmesa->dma.current);
185		radeon_bo_unref(rmesa->dma.current);
186		rmesa->dma.current = 0;
187	}
188
189again_alloc:
190	rmesa->dma.current = radeon_bo_open(rmesa->radeonScreen->bom,
191					    0, size, 4, RADEON_GEM_DOMAIN_GTT,
192					    0);
193
194	if (!rmesa->dma.current) {
195		rcommonFlushCmdBuf(rmesa, __FUNCTION__);
196		rmesa->dma.nr_released_bufs = 0;
197		goto again_alloc;
198	}
199
200	rmesa->dma.current_used = 0;
201	rmesa->dma.current_vertexptr = 0;
202
203	bos[0].bo = rmesa->dma.current;
204	bos[0].read_domains = RADEON_GEM_DOMAIN_GTT;
205	bos[0].write_domain =0 ;
206	bos[0].new_accounted = 0;
207
208	ret = radeon_cs_space_check(rmesa->cmdbuf.cs, bos, 1);
209	if (ret == RADEON_CS_SPACE_OP_TO_BIG) {
210		fprintf(stderr,"Got OPEARTION TO BIG ILLEGAL - this cannot happen");
211		assert(0);
212	} else if (ret == RADEON_CS_SPACE_FLUSH) {
213		rcommonFlushCmdBuf(rmesa, __FUNCTION__);
214		if (flushed) {
215			fprintf(stderr,"flushed but still no space\n");
216			assert(0);
217		}
218		flushed = 1;
219		goto again_alloc;
220	}
221	radeon_bo_map(rmesa->dma.current, 1);
222}
223
224/* Allocates a region from rmesa->dma.current.  If there isn't enough
225 * space in current, grab a new buffer (and discard what was left of current)
226 */
227void radeonAllocDmaRegion(radeonContextPtr rmesa,
228			  struct radeon_bo **pbo, int *poffset,
229			  int bytes, int alignment)
230{
231	if (RADEON_DEBUG & DEBUG_IOCTL)
232		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
233
234	if (rmesa->dma.flush)
235		rmesa->dma.flush(rmesa->glCtx);
236
237	assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
238
239	alignment--;
240	rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
241
242	if (!rmesa->dma.current || rmesa->dma.current_used + bytes > rmesa->dma.current->size)
243		radeonRefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15);
244
245	*poffset = rmesa->dma.current_used;
246	*pbo = rmesa->dma.current;
247	radeon_bo_ref(*pbo);
248
249	/* Always align to at least 16 bytes */
250	rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
251	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
252
253	assert(rmesa->dma.current_used <= rmesa->dma.current->size);
254}
255
256void radeonReleaseDmaRegion(radeonContextPtr rmesa)
257{
258	if (RADEON_DEBUG & DEBUG_IOCTL)
259		fprintf(stderr, "%s %p\n", __FUNCTION__, rmesa->dma.current);
260	if (rmesa->dma.current) {
261		rmesa->dma.nr_released_bufs++;
262		radeon_bo_unmap(rmesa->dma.current);
263	        radeon_bo_unref(rmesa->dma.current);
264	}
265	rmesa->dma.current = NULL;
266}
267
268
269/* Flush vertices in the current dma region.
270 */
271void rcommon_flush_last_swtcl_prim( GLcontext *ctx  )
272{
273	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
274	struct radeon_dma *dma = &rmesa->dma;
275
276
277	if (RADEON_DEBUG & DEBUG_IOCTL)
278		fprintf(stderr, "%s %p\n", __FUNCTION__, dma->current);
279	dma->flush = NULL;
280
281	if (dma->current) {
282	    GLuint current_offset = dma->current_used;
283
284	    assert (dma->current_used +
285		    rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
286		    dma->current_vertexptr);
287
288	    if (dma->current_used != dma->current_vertexptr) {
289		    dma->current_used = dma->current_vertexptr;
290
291		    rmesa->vtbl.swtcl_flush(ctx, current_offset);
292	    }
293	    rmesa->swtcl.numverts = 0;
294	}
295}
296/* Alloc space in the current dma region.
297 */
298void *
299rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
300{
301	GLuint bytes = vsize * nverts;
302	void *head;
303restart:
304	if (!rmesa->dma.current || rmesa->dma.current_vertexptr + bytes > rmesa->dma.current->size) {
305                radeonRefillCurrentDmaRegion(rmesa, bytes);
306	}
307
308        if (!rmesa->dma.flush) {
309		/* make sure we have enough space to use this in cmdbuf */
310   		rcommonEnsureCmdBufSpace(rmesa,
311			      rmesa->hw.max_state_size + (12*sizeof(int)),
312			      __FUNCTION__);
313		/* if cmdbuf flushed DMA restart */
314		if (!rmesa->dma.current)
315			goto restart;
316                rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
317                rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
318        }
319
320	ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
321        ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
322        ASSERT( rmesa->dma.current_used +
323                rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
324                rmesa->dma.current_vertexptr );
325
326	head = (rmesa->dma.current->ptr + rmesa->dma.current_vertexptr);
327	rmesa->dma.current_vertexptr += bytes;
328	rmesa->swtcl.numverts += nverts;
329	return head;
330}
331