radeon_dma.c revision 66bbafb6f9d44da3baddac6d948ba361182dde2a
1/**************************************************************************
2
3Copyright (C) 2004 Nicolai Haehnle.
4Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
5
6The Weather Channel (TM) funded Tungsten Graphics to develop the
7initial release of the Radeon 8500 driver under the XFree86 license.
8This notice must be preserved.
9
10All Rights Reserved.
11
12Permission is hereby granted, free of charge, to any person obtaining a
13copy of this software and associated documentation files (the "Software"),
14to deal in the Software without restriction, including without limitation
15on the rights to use, copy, modify, merge, publish, distribute, sub
16license, and/or sell copies of the Software, and to permit persons to whom
17the Software is furnished to do so, subject to the following conditions:
18
19The above copyright notice and this permission notice (including the next
20paragraph) shall be included in all copies or substantial portions of the
21Software.
22
23THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29USE OR OTHER DEALINGS IN THE SOFTWARE.
30
31**************************************************************************/
32
33#include <errno.h>
34#include "radeon_common.h"
35#include "main/simple_list.h"
36
37#if defined(USE_X86_ASM)
38#define COPY_DWORDS( dst, src, nr )					\
39do {									\
40	int __tmp;							\
41	__asm__ __volatile__( "rep ; movsl"				\
42			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
43			      : "0" (nr),				\
44			        "D" ((long)dst),			\
45			        "S" ((long)src) );			\
46} while (0)
47#else
48#define COPY_DWORDS( dst, src, nr )		\
49do {						\
50   int j;					\
51   for ( j = 0 ; j < nr ; j++ )			\
52      dst[j] = ((int *)src)[j];			\
53   dst += nr;					\
54} while (0)
55#endif
56
57void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
58{
59	int i;
60
61	if (RADEON_DEBUG & DEBUG_VERTS)
62		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
63			__FUNCTION__, count, stride, (void *)out, (void *)data);
64
65	if (stride == 4)
66		COPY_DWORDS(out, data, count);
67	else
68		for (i = 0; i < count; i++) {
69			out[0] = *(int *)data;
70			out++;
71			data += stride;
72		}
73}
74
75void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
76{
77	int i;
78
79	if (RADEON_DEBUG & DEBUG_VERTS)
80		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
81			__FUNCTION__, count, stride, (void *)out, (void *)data);
82
83	if (stride == 8)
84		COPY_DWORDS(out, data, count * 2);
85	else
86		for (i = 0; i < count; i++) {
87			out[0] = *(int *)data;
88			out[1] = *(int *)(data + 4);
89			out += 2;
90			data += stride;
91		}
92}
93
94void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
95{
96	int i;
97
98	if (RADEON_DEBUG & DEBUG_VERTS)
99		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
100			__FUNCTION__, count, stride, (void *)out, (void *)data);
101
102	if (stride == 12) {
103		COPY_DWORDS(out, data, count * 3);
104    }
105	else
106		for (i = 0; i < count; i++) {
107			out[0] = *(int *)data;
108			out[1] = *(int *)(data + 4);
109			out[2] = *(int *)(data + 8);
110			out += 3;
111			data += stride;
112		}
113}
114
115void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
116{
117	int i;
118
119	if (RADEON_DEBUG & DEBUG_VERTS)
120		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
121			__FUNCTION__, count, stride, (void *)out, (void *)data);
122
123	if (stride == 16)
124		COPY_DWORDS(out, data, count * 4);
125	else
126		for (i = 0; i < count; i++) {
127			out[0] = *(int *)data;
128			out[1] = *(int *)(data + 4);
129			out[2] = *(int *)(data + 8);
130			out[3] = *(int *)(data + 12);
131			out += 4;
132			data += stride;
133		}
134}
135
136void rcommon_emit_vector(GLcontext * ctx, struct radeon_aos *aos,
137			 const GLvoid * data, int size, int stride, int count)
138{
139	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
140	uint32_t *out;
141
142	if (stride == 0) {
143		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
144		count = 1;
145		aos->stride = 0;
146	} else {
147		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
148		aos->stride = size;
149	}
150
151	aos->components = size;
152	aos->count = count;
153
154	out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
155	switch (size) {
156	case 1: radeonEmitVec4(out, data, stride, count); break;
157	case 2: radeonEmitVec8(out, data, stride, count); break;
158	case 3: radeonEmitVec12(out, data, stride, count); break;
159	case 4: radeonEmitVec16(out, data, stride, count); break;
160	default:
161		assert(0);
162		break;
163	}
164}
165
166void radeon_init_dma(radeonContextPtr rmesa)
167{
168	make_empty_list(&rmesa->dma.free);
169	make_empty_list(&rmesa->dma.wait);
170	make_empty_list(&rmesa->dma.reserved);
171	rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
172}
173
174void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
175{
176	/* we set minimum sizes to at least requested size
177	   aligned to next 16 bytes. */
178	if (size > rmesa->dma.minimum_size)
179		rmesa->dma.minimum_size = (size + 15) & (~15);
180
181	if (RADEON_DEBUG & (DEBUG_IOCTL | DEBUG_DMA))
182		fprintf(stderr, "%s\n", __FUNCTION__);
183
184	if (rmesa->dma.flush) {
185		rmesa->dma.flush(rmesa->glCtx);
186	}
187
188	/* unmap old reserved bo */
189	if (!is_empty_list(&rmesa->dma.reserved))
190		radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
191
192	if (is_empty_list(&rmesa->dma.free)
193	      || last_elem(&rmesa->dma.free)->bo->size < size) {
194		struct radeon_dma_bo *dma_bo = CALLOC(sizeof(struct radeon_dma_bo));
195		assert(dma_bo);
196
197again_alloc:
198		dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
199					    0, rmesa->dma.minimum_size, 4,
200					    RADEON_GEM_DOMAIN_GTT, 0);
201
202		if (!dma_bo->bo) {
203			rcommonFlushCmdBuf(rmesa, __FUNCTION__);
204			goto again_alloc;
205		}
206		insert_at_head(&rmesa->dma.reserved, dma_bo);
207	} else {
208		/* We push and pop buffers from end of list so we can keep
209		   counter on unused buffers for later freeing them from
210		   begin of list */
211		struct radeon_dma_bo *dma_bo = last_elem(&rmesa->dma.free);
212		assert(dma_bo->bo->cref == 1);
213		remove_from_list(dma_bo);
214		insert_at_head(&rmesa->dma.reserved, dma_bo);
215	}
216
217	rmesa->dma.current_used = 0;
218	rmesa->dma.current_vertexptr = 0;
219
220	if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
221					  first_elem(&rmesa->dma.reserved)->bo,
222					  RADEON_GEM_DOMAIN_GTT, 0))
223		fprintf(stderr,"failure to revalidate BOs - badness\n");
224
225	if (is_empty_list(&rmesa->dma.reserved)) {
226        /* Cmd buff have been flushed in radeon_revalidate_bos */
227		goto again_alloc;
228	}
229
230	radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
231}
232
233/* Allocates a region from rmesa->dma.current.  If there isn't enough
234 * space in current, grab a new buffer (and discard what was left of current)
235 */
236void radeonAllocDmaRegion(radeonContextPtr rmesa,
237			  struct radeon_bo **pbo, int *poffset,
238			  int bytes, int alignment)
239{
240	if (RADEON_DEBUG & DEBUG_IOCTL)
241		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
242
243	if (rmesa->dma.flush)
244		rmesa->dma.flush(rmesa->glCtx);
245
246	assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
247
248	alignment--;
249	rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
250
251	if (is_empty_list(&rmesa->dma.reserved)
252		|| rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
253		radeonRefillCurrentDmaRegion(rmesa, bytes);
254
255	*poffset = rmesa->dma.current_used;
256	*pbo = first_elem(&rmesa->dma.reserved)->bo;
257	radeon_bo_ref(*pbo);
258
259	/* Always align to at least 16 bytes */
260	rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
261	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
262
263	assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
264}
265
266void radeonFreeDmaRegions(radeonContextPtr rmesa)
267{
268	struct radeon_dma_bo *dma_bo;
269	struct radeon_dma_bo *temp;
270	if (RADEON_DEBUG & DEBUG_DMA)
271		fprintf(stderr, "%s\n", __FUNCTION__);
272
273	foreach_s(dma_bo, temp, &rmesa->dma.free) {
274		remove_from_list(dma_bo);
275	        radeon_bo_unref(dma_bo->bo);
276		FREE(dma_bo);
277	}
278
279	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
280		remove_from_list(dma_bo);
281	        radeon_bo_unref(dma_bo->bo);
282		FREE(dma_bo);
283	}
284
285	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
286		remove_from_list(dma_bo);
287		radeon_bo_unmap(dma_bo->bo);
288	        radeon_bo_unref(dma_bo->bo);
289		FREE(dma_bo);
290	}
291}
292
293void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
294{
295	if (is_empty_list(&rmesa->dma.reserved))
296		return;
297
298	if (RADEON_DEBUG & DEBUG_IOCTL)
299		fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
300	rmesa->dma.current_used -= return_bytes;
301	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
302}
303
304static int radeon_bo_is_idle(struct radeon_bo* bo)
305{
306	uint32_t domain;
307	int ret = radeon_bo_is_busy(bo, &domain);
308	if (ret == -EINVAL) {
309		WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
310			"This may cause small performance drop for you.\n");
311	}
312	return ret != -EBUSY;
313}
314
315void radeonReleaseDmaRegions(radeonContextPtr rmesa)
316{
317	struct radeon_dma_bo *dma_bo;
318	struct radeon_dma_bo *temp;
319	const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
320	const int time = rmesa->dma.free.expire_counter;
321
322	if (RADEON_DEBUG & DEBUG_DMA) {
323		size_t free = 0,
324		       wait = 0,
325		       reserved = 0;
326		foreach(dma_bo, &rmesa->dma.free)
327			++free;
328
329		foreach(dma_bo, &rmesa->dma.wait)
330			++wait;
331
332		foreach(dma_bo, &rmesa->dma.reserved)
333			++reserved;
334
335		fprintf(stderr, "%s: free %u, wait %u, reserved %u, minimum_size: %u\n",
336		      __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
337	}
338
339	if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
340		/* request updated cs processing information from kernel */
341		legacy_track_pending(rmesa->radeonScreen->bom, 0);
342	}
343	/* move waiting bos to free list.
344	   wait list provides gpu time to handle data before reuse */
345	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
346		if (dma_bo->expire_counter == time) {
347			WARN_ONCE("Leaking dma buffer object!\n");
348			radeon_bo_unref(dma_bo->bo);
349			remove_from_list(dma_bo);
350			FREE(dma_bo);
351			continue;
352		}
353		/* free objects that are too small to be used because of large request */
354		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
355		   radeon_bo_unref(dma_bo->bo);
356		   remove_from_list(dma_bo);
357		   FREE(dma_bo);
358		   continue;
359		}
360		if (!radeon_bo_is_idle(dma_bo->bo))
361			continue;
362		remove_from_list(dma_bo);
363		dma_bo->expire_counter = expire_at;
364		insert_at_tail(&rmesa->dma.free, dma_bo);
365	}
366
367	/* unmap the last dma region */
368	if (!is_empty_list(&rmesa->dma.reserved))
369		radeon_bo_unmap(first_elem(&rmesa->dma.reserved)->bo);
370	/* move reserved to wait list */
371	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
372		/* free objects that are too small to be used because of large request */
373		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
374		   radeon_bo_unref(dma_bo->bo);
375		   remove_from_list(dma_bo);
376		   FREE(dma_bo);
377		   continue;
378		}
379		remove_from_list(dma_bo);
380		dma_bo->expire_counter = expire_at;
381		insert_at_tail(&rmesa->dma.wait, dma_bo);
382	}
383
384	/* free bos that have been unused for some time */
385	foreach_s(dma_bo, temp, &rmesa->dma.free) {
386		if (dma_bo->expire_counter != time)
387			break;
388		remove_from_list(dma_bo);
389	        radeon_bo_unref(dma_bo->bo);
390		FREE(dma_bo);
391	}
392
393}
394
395
396/* Flush vertices in the current dma region.
397 */
398void rcommon_flush_last_swtcl_prim( GLcontext *ctx  )
399{
400	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
401	struct radeon_dma *dma = &rmesa->dma;
402
403
404	if (RADEON_DEBUG & DEBUG_IOCTL)
405		fprintf(stderr, "%s\n", __FUNCTION__);
406	dma->flush = NULL;
407
408	if (!is_empty_list(&dma->reserved)) {
409	    GLuint current_offset = dma->current_used;
410
411	    assert (dma->current_used +
412		    rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
413		    dma->current_vertexptr);
414
415	    if (dma->current_used != dma->current_vertexptr) {
416		    dma->current_used = dma->current_vertexptr;
417
418		    rmesa->vtbl.swtcl_flush(ctx, current_offset);
419	    }
420	    rmesa->swtcl.numverts = 0;
421	}
422}
423/* Alloc space in the current dma region.
424 */
425void *
426rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
427{
428	GLuint bytes = vsize * nverts;
429	void *head;
430restart:
431	if (RADEON_DEBUG & DEBUG_IOCTL)
432		fprintf(stderr, "%s\n", __FUNCTION__);
433	if (is_empty_list(&rmesa->dma.reserved)
434		|| rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
435                radeonRefillCurrentDmaRegion(rmesa, bytes);
436	}
437
438        if (!rmesa->dma.flush) {
439		/* make sure we have enough space to use this in cmdbuf */
440   		rcommonEnsureCmdBufSpace(rmesa,
441			      rmesa->hw.max_state_size + (20*sizeof(int)),
442			      __FUNCTION__);
443		/* if cmdbuf flushed DMA restart */
444		if (is_empty_list(&rmesa->dma.reserved))
445			goto restart;
446                rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
447                rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
448        }
449
450	ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
451        ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
452        ASSERT( rmesa->dma.current_used +
453                rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
454                rmesa->dma.current_vertexptr );
455
456	head = (first_elem(&rmesa->dma.reserved)->bo->ptr + rmesa->dma.current_vertexptr);
457	rmesa->dma.current_vertexptr += bytes;
458	rmesa->swtcl.numverts += nverts;
459	return head;
460}
461
462void radeonReleaseArrays( GLcontext *ctx, GLuint newinputs )
463{
464   radeonContextPtr radeon = RADEON_CONTEXT( ctx );
465   int i;
466	if (RADEON_DEBUG & DEBUG_IOCTL)
467		fprintf(stderr, "%s\n", __FUNCTION__);
468
469   if (radeon->dma.flush) {
470       radeon->dma.flush(radeon->glCtx);
471   }
472   for (i = 0; i < radeon->tcl.aos_count; i++) {
473      if (radeon->tcl.aos[i].bo) {
474         radeon_bo_unref(radeon->tcl.aos[i].bo);
475         radeon->tcl.aos[i].bo = NULL;
476
477      }
478   }
479}
480