radeon_dma.c revision f9b8562f32b77a27c872d4c70a86995032541107
1/**************************************************************************
2
3Copyright (C) 2004 Nicolai Haehnle.
4Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
5
6The Weather Channel (TM) funded Tungsten Graphics to develop the
7initial release of the Radeon 8500 driver under the XFree86 license.
8This notice must be preserved.
9
10All Rights Reserved.
11
12Permission is hereby granted, free of charge, to any person obtaining a
13copy of this software and associated documentation files (the "Software"),
14to deal in the Software without restriction, including without limitation
15on the rights to use, copy, modify, merge, publish, distribute, sub
16license, and/or sell copies of the Software, and to permit persons to whom
17the Software is furnished to do so, subject to the following conditions:
18
19The above copyright notice and this permission notice (including the next
20paragraph) shall be included in all copies or substantial portions of the
21Software.
22
23THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29USE OR OTHER DEALINGS IN THE SOFTWARE.
30
31**************************************************************************/
32
33#include "radeon_common.h"
34#include "main/simple_list.h"
35
36#if defined(USE_X86_ASM)
37#define COPY_DWORDS( dst, src, nr )					\
38do {									\
39	int __tmp;							\
40	__asm__ __volatile__( "rep ; movsl"				\
41			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
42			      : "0" (nr),				\
43			        "D" ((long)dst),			\
44			        "S" ((long)src) );			\
45} while (0)
46#else
47#define COPY_DWORDS( dst, src, nr )		\
48do {						\
49   int j;					\
50   for ( j = 0 ; j < nr ; j++ )			\
51      dst[j] = ((int *)src)[j];			\
52   dst += nr;					\
53} while (0)
54#endif
55
56void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
57{
58	int i;
59
60	if (RADEON_DEBUG & DEBUG_VERTS)
61		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
62			__FUNCTION__, count, stride, (void *)out, (void *)data);
63
64	if (stride == 4)
65		COPY_DWORDS(out, data, count);
66	else
67		for (i = 0; i < count; i++) {
68			out[0] = *(int *)data;
69			out++;
70			data += stride;
71		}
72}
73
74void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
75{
76	int i;
77
78	if (RADEON_DEBUG & DEBUG_VERTS)
79		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
80			__FUNCTION__, count, stride, (void *)out, (void *)data);
81
82	if (stride == 8)
83		COPY_DWORDS(out, data, count * 2);
84	else
85		for (i = 0; i < count; i++) {
86			out[0] = *(int *)data;
87			out[1] = *(int *)(data + 4);
88			out += 2;
89			data += stride;
90		}
91}
92
93void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
94{
95	int i;
96
97	if (RADEON_DEBUG & DEBUG_VERTS)
98		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
99			__FUNCTION__, count, stride, (void *)out, (void *)data);
100
101	if (stride == 12) {
102		COPY_DWORDS(out, data, count * 3);
103    }
104	else
105		for (i = 0; i < count; i++) {
106			out[0] = *(int *)data;
107			out[1] = *(int *)(data + 4);
108			out[2] = *(int *)(data + 8);
109			out += 3;
110			data += stride;
111		}
112}
113
114void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
115{
116	int i;
117
118	if (RADEON_DEBUG & DEBUG_VERTS)
119		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
120			__FUNCTION__, count, stride, (void *)out, (void *)data);
121
122	if (stride == 16)
123		COPY_DWORDS(out, data, count * 4);
124	else
125		for (i = 0; i < count; i++) {
126			out[0] = *(int *)data;
127			out[1] = *(int *)(data + 4);
128			out[2] = *(int *)(data + 8);
129			out[3] = *(int *)(data + 12);
130			out += 4;
131			data += stride;
132		}
133}
134
135void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
136			 const GLvoid * data, int size, int stride, int count)
137{
138	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
139	uint32_t *out;
140
141	if (stride == 0) {
142		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
143		count = 1;
144		aos->stride = 0;
145	} else {
146		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
147		aos->stride = size;
148	}
149
150	aos->components = size;
151	aos->count = count;
152
153	out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
154	switch (size) {
155	case 1: radeonEmitVec4(out, data, stride, count); break;
156	case 2: radeonEmitVec8(out, data, stride, count); break;
157	case 3: radeonEmitVec12(out, data, stride, count); break;
158	case 4: radeonEmitVec16(out, data, stride, count); break;
159	default:
160		assert(0);
161		break;
162	}
163}
164
165void radeon_init_dma(radeonContextPtr rmesa)
166{
167   make_empty_list(&rmesa->dma.free);
168   make_empty_list(&rmesa->dma.wait);
169   make_empty_list(&rmesa->dma.reserved);
170}
171
172void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
173{
174	size = MAX2(size, MAX_DMA_BUF_SZ);
175
176	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
177		fprintf(stderr, "%s\n", __FUNCTION__);
178
179	if (rmesa->dma.flush) {
180		rmesa->dma.flush(rmesa->glCtx);
181	}
182
183	/* unmap old reserved bo */
184	if (!is_empty_list(&rmesa->dma.reserved))
185		radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
186
187	if (is_empty_list(&rmesa->dma.free)) {
188		struct radeon_dma_bo *dma_bo = CALLOC(sizeof(struct radeon_dma_bo));
189		assert(dma_bo);
190
191again_alloc:
192		dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
193					    0, size, 4, RADEON_GEM_DOMAIN_GTT,
194					    0);
195
196		if (!dma_bo->bo) {
197			rcommonFlushCmdBuf(rmesa, __FUNCTION__);
198			goto again_alloc;
199		}
200		insert_at_head(&rmesa->dma.reserved, dma_bo);
201	} else {
202		struct radeon_dma_bo *dma_bo = last_elem(&rmesa->dma.free);
203		assert(dma_bo->bo->cref == 1);
204		remove_from_list(dma_bo);
205		insert_at_head(&rmesa->dma.reserved, dma_bo);
206	}
207
208	rmesa->dma.current_used = 0;
209	rmesa->dma.current_vertexptr = 0;
210
211	if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
212					  first_elem(&rmesa->dma.reserved)->bo,
213					  RADEON_GEM_DOMAIN_GTT, 0))
214		fprintf(stderr,"failure to revalidate BOs - badness\n");
215
216	if (is_empty_list(&rmesa->dma.reserved)) {
217        /* Cmd buff have been flushed in radeon_revalidate_bos */
218		goto again_alloc;
219	}
220
221	radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
222}
223
224/* Allocates a region from rmesa->dma.current.  If there isn't enough
225 * space in current, grab a new buffer (and discard what was left of current)
226 */
227void radeonAllocDmaRegion(radeonContextPtr rmesa,
228			  struct radeon_bo **pbo, int *poffset,
229			  int bytes, int alignment)
230{
231	if (RADEON_DEBUG & DEBUG_IOCTL)
232		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
233
234	if (rmesa->dma.flush)
235		rmesa->dma.flush(rmesa->glCtx);
236
237	assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
238
239	alignment--;
240	rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
241
242	if (is_empty_list(&rmesa->dma.reserved)
243		|| rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
244		radeonRefillCurrentDmaRegion(rmesa, (bytes + 15) & ~15);
245
246	*poffset = rmesa->dma.current_used;
247	*pbo = first_elem(&rmesa->dma.reserved)->bo;
248	radeon_bo_ref(*pbo);
249
250	/* Always align to at least 16 bytes */
251	rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
252	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
253
254	assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
255}
256
257void radeonFreeDmaRegions(radeonContextPtr rmesa)
258{
259	struct radeon_dma_bo *dma_bo;
260	struct radeon_dma_bo *temp;
261	if (RADEON_DEBUG & DEBUG_IOCTL)
262		fprintf(stderr, "%s\n", __FUNCTION__);
263
264	foreach_s(dma_bo, temp, &rmesa->dma.free) {
265		remove_from_list(dma_bo);
266	        radeon_bo_unref(dma_bo->bo);
267		FREE(dma_bo);
268	}
269
270	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
271		remove_from_list(dma_bo);
272		radeon_bo_unmap(dma_bo->bo);
273	        radeon_bo_unref(dma_bo->bo);
274		FREE(dma_bo);
275	}
276}
277
278void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
279{
280	if (is_empty_list(&rmesa->dma.reserved))
281		return;
282
283	if (RADEON_DEBUG & DEBUG_IOCTL)
284		fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
285	rmesa->dma.current_used -= return_bytes;
286	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
287}
288
289void radeonReleaseDmaRegions(radeonContextPtr rmesa)
290{
291	struct radeon_dma_bo *dma_bo;
292	struct radeon_dma_bo *temp;
293	const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
294	const int time = rmesa->dma.free.expire_counter;
295	if (RADEON_DEBUG & DEBUG_IOCTL)
296		fprintf(stderr, "%s\n", __FUNCTION__);
297
298	/* move waiting bos to free list.
299	   wait list provides gpu time to handle data before reuse */
300	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
301		if (dma_bo->expire_counter == time) {
302			WARN_ONCE("Leaking dma buffer object!\n");
303			radeon_bo_unref(dma_bo->bo);
304			remove_from_list(dma_bo);
305			FREE(dma_bo);
306			continue;
307		}
308		if (dma_bo->bo->cref > 1)
309			continue;
310		remove_from_list(dma_bo);
311		dma_bo->expire_counter = expire_at;
312		insert_at_tail(&rmesa->dma.free, dma_bo);
313	}
314
315	/* unmap the last dma region */
316	if (!is_empty_list(&rmesa->dma.reserved))
317		radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
318	/* move reserved to wait list */
319	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
320		remove_from_list(dma_bo);
321		dma_bo->expire_counter = expire_at;
322		insert_at_tail(&rmesa->dma.wait, dma_bo);
323	}
324
325	/* free bos that have been unused for some time */
326	foreach_s(dma_bo, temp, &rmesa->dma.free) {
327		if (dma_bo->expire_counter != time)
328			break;
329		remove_from_list(dma_bo);
330	        radeon_bo_unref(dma_bo->bo);
331		FREE(dma_bo);
332	}
333
334}
335
336
337/* Flush vertices in the current dma region.
338 */
339void rcommon_flush_last_swtcl_prim( GLcontext *ctx  )
340{
341	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
342	struct radeon_dma *dma = &rmesa->dma;
343
344
345	if (RADEON_DEBUG & DEBUG_IOCTL)
346		fprintf(stderr, "%s\n", __FUNCTION__);
347	dma->flush = NULL;
348
349	if (!is_empty_list(&dma->reserved)) {
350	    GLuint current_offset = dma->current_used;
351
352	    assert (dma->current_used +
353		    rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
354		    dma->current_vertexptr);
355
356	    if (dma->current_used != dma->current_vertexptr) {
357		    dma->current_used = dma->current_vertexptr;
358
359		    rmesa->vtbl.swtcl_flush(ctx, current_offset);
360	    }
361	    rmesa->swtcl.numverts = 0;
362	}
363}
364/* Alloc space in the current dma region.
365 */
366void *
367rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
368{
369	GLuint bytes = vsize * nverts;
370	void *head;
371restart:
372	if (RADEON_DEBUG & DEBUG_IOCTL)
373		fprintf(stderr, "%s\n", __FUNCTION__);
374	if (is_empty_list(&rmesa->dma.reserved)
375		|| rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
376                radeonRefillCurrentDmaRegion(rmesa, bytes);
377	}
378
379        if (!rmesa->dma.flush) {
380		/* make sure we have enough space to use this in cmdbuf */
381   		rcommonEnsureCmdBufSpace(rmesa,
382			      rmesa->hw.max_state_size + (20*sizeof(int)),
383			      __FUNCTION__);
384		/* if cmdbuf flushed DMA restart */
385		if (is_empty_list(&rmesa->dma.reserved))
386			goto restart;
387                rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
388                rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
389        }
390
391	ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
392        ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
393        ASSERT( rmesa->dma.current_used +
394                rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
395                rmesa->dma.current_vertexptr );
396
397	head = (first_elem(&rmesa->dma.reserved)->bo->ptr + rmesa->dma.current_vertexptr);
398	rmesa->dma.current_vertexptr += bytes;
399	rmesa->swtcl.numverts += nverts;
400	return head;
401}
402
403void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
404{
405   radeonContextPtr radeon = RADEON_CONTEXT( ctx );
406   int i;
407	if (RADEON_DEBUG & DEBUG_IOCTL)
408		fprintf(stderr, "%s\n", __FUNCTION__);
409
410   if (radeon->dma.flush) {
411       radeon->dma.flush(radeon->glCtx);
412   }
413   for (i = 0; i < radeon->tcl.aos_count; i++) {
414      if (radeon->tcl.aos[i].bo) {
415         radeon_bo_unref(radeon->tcl.aos[i].bo);
416         radeon->tcl.aos[i].bo = NULL;
417
418      }
419   }
420}
421