1cf24119d38360bfb25fa2683fe86a139826084f0Michel Dänzer/**************************************************************************
2
3Copyright (C) 2004 Nicolai Haehnle.
4Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
5
6The Weather Channel (TM) funded Tungsten Graphics to develop the
7initial release of the Radeon 8500 driver under the XFree86 license.
8This notice must be preserved.
9
10All Rights Reserved.
11
12Permission is hereby granted, free of charge, to any person obtaining a
13copy of this software and associated documentation files (the "Software"),
14to deal in the Software without restriction, including without limitation
15on the rights to use, copy, modify, merge, publish, distribute, sub
16license, and/or sell copies of the Software, and to permit persons to whom
17the Software is furnished to do so, subject to the following conditions:
18
19The above copyright notice and this permission notice (including the next
20paragraph) shall be included in all copies or substantial portions of the
21Software.
22
23THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
26ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
27DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
28OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
29USE OR OTHER DEALINGS IN THE SOFTWARE.
30
31**************************************************************************/
32
33#include <errno.h>
34#include "radeon_common.h"
35#include "radeon_fog.h"
36#include "main/simple_list.h"
37
38#if defined(USE_X86_ASM)
39#define COPY_DWORDS( dst, src, nr )					\
40do {									\
41	int __tmp;							\
42	__asm__ __volatile__( "rep ; movsl"				\
43			      : "=%c" (__tmp), "=D" (dst), "=S" (__tmp)	\
44			      : "0" (nr),				\
45			        "D" ((long)dst),			\
46			        "S" ((long)src) );			\
47} while (0)
48#else
49#define COPY_DWORDS( dst, src, nr )		\
50do {						\
51   int j;					\
52   for ( j = 0 ; j < nr ; j++ )			\
53      dst[j] = ((int *)src)[j];			\
54   dst += nr;					\
55} while (0)
56#endif
57
58void radeonEmitVec4(uint32_t *out, const GLvoid * data, int stride, int count)
59{
60	int i;
61
62	if (RADEON_DEBUG & RADEON_VERTS)
63		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
64			__FUNCTION__, count, stride, (void *)out, (void *)data);
65
66	if (stride == 4)
67		COPY_DWORDS(out, data, count);
68	else
69		for (i = 0; i < count; i++) {
70			out[0] = *(int *)data;
71			out++;
72			data += stride;
73		}
74}
75
76void radeonEmitVec8(uint32_t *out, const GLvoid * data, int stride, int count)
77{
78	int i;
79
80	if (RADEON_DEBUG & RADEON_VERTS)
81		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
82			__FUNCTION__, count, stride, (void *)out, (void *)data);
83
84	if (stride == 8)
85		COPY_DWORDS(out, data, count * 2);
86	else
87		for (i = 0; i < count; i++) {
88			out[0] = *(int *)data;
89			out[1] = *(int *)(data + 4);
90			out += 2;
91			data += stride;
92		}
93}
94
95void radeonEmitVec12(uint32_t *out, const GLvoid * data, int stride, int count)
96{
97	int i;
98
99	if (RADEON_DEBUG & RADEON_VERTS)
100		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
101			__FUNCTION__, count, stride, (void *)out, (void *)data);
102
103	if (stride == 12) {
104		COPY_DWORDS(out, data, count * 3);
105    }
106	else
107		for (i = 0; i < count; i++) {
108			out[0] = *(int *)data;
109			out[1] = *(int *)(data + 4);
110			out[2] = *(int *)(data + 8);
111			out += 3;
112			data += stride;
113		}
114}
115
116void radeonEmitVec16(uint32_t *out, const GLvoid * data, int stride, int count)
117{
118	int i;
119
120	if (RADEON_DEBUG & RADEON_VERTS)
121		fprintf(stderr, "%s count %d stride %d out %p data %p\n",
122			__FUNCTION__, count, stride, (void *)out, (void *)data);
123
124	if (stride == 16)
125		COPY_DWORDS(out, data, count * 4);
126	else
127		for (i = 0; i < count; i++) {
128			out[0] = *(int *)data;
129			out[1] = *(int *)(data + 4);
130			out[2] = *(int *)(data + 8);
131			out[3] = *(int *)(data + 12);
132			out += 4;
133			data += stride;
134		}
135}
136
137void rcommon_emit_vector(struct gl_context * ctx, struct radeon_aos *aos,
138			 const GLvoid * data, int size, int stride, int count)
139{
140	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
141	uint32_t *out;
142
143	if (stride == 0) {
144		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * 4, 32);
145		count = 1;
146		aos->stride = 0;
147	} else {
148		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
149		aos->stride = size;
150	}
151
152	aos->components = size;
153	aos->count = count;
154
155	radeon_bo_map(aos->bo, 1);
156	out = (uint32_t*)((char*)aos->bo->ptr + aos->offset);
157	switch (size) {
158	case 1: radeonEmitVec4(out, data, stride, count); break;
159	case 2: radeonEmitVec8(out, data, stride, count); break;
160	case 3: radeonEmitVec12(out, data, stride, count); break;
161	case 4: radeonEmitVec16(out, data, stride, count); break;
162	default:
163		assert(0);
164		break;
165	}
166	radeon_bo_unmap(aos->bo);
167}
168
169void rcommon_emit_vecfog(struct gl_context *ctx, struct radeon_aos *aos,
170			 GLvoid *data, int stride, int count)
171{
172	int i;
173	float *out;
174	int size = 1;
175	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
176
177	if (RADEON_DEBUG & RADEON_VERTS)
178		fprintf(stderr, "%s count %d stride %d\n",
179			__FUNCTION__, count, stride);
180
181	if (stride == 0) {
182		radeonAllocDmaRegion( rmesa, &aos->bo, &aos->offset, size * 4, 32 );
183		count = 1;
184		aos->stride = 0;
185	} else {
186		radeonAllocDmaRegion(rmesa, &aos->bo, &aos->offset, size * count * 4, 32);
187		aos->stride = size;
188	}
189
190	aos->components = size;
191	aos->count = count;
192
193	/* Emit the data */
194	radeon_bo_map(aos->bo, 1);
195	out = (float*)((char*)aos->bo->ptr + aos->offset);
196	for (i = 0; i < count; i++) {
197		out[0] = radeonComputeFogBlendFactor( ctx, *(GLfloat *)data );
198		out++;
199		data += stride;
200	}
201	radeon_bo_unmap(aos->bo);
202}
203
204void radeon_init_dma(radeonContextPtr rmesa)
205{
206	make_empty_list(&rmesa->dma.free);
207	make_empty_list(&rmesa->dma.wait);
208	make_empty_list(&rmesa->dma.reserved);
209	rmesa->dma.minimum_size = MAX_DMA_BUF_SZ;
210}
211
212void radeonRefillCurrentDmaRegion(radeonContextPtr rmesa, int size)
213{
214	struct radeon_dma_bo *dma_bo = NULL;
215	/* we set minimum sizes to at least requested size
216	   aligned to next 16 bytes. */
217	if (size > rmesa->dma.minimum_size)
218		rmesa->dma.minimum_size = (size + 15) & (~15);
219
220	radeon_print(RADEON_DMA, RADEON_NORMAL, "%s size %d minimum_size %Zi\n",
221			__FUNCTION__, size, rmesa->dma.minimum_size);
222
223	if (is_empty_list(&rmesa->dma.free)
224	      || last_elem(&rmesa->dma.free)->bo->size < size) {
225		dma_bo = CALLOC_STRUCT(radeon_dma_bo);
226		assert(dma_bo);
227
228again_alloc:
229		dma_bo->bo = radeon_bo_open(rmesa->radeonScreen->bom,
230					    0, rmesa->dma.minimum_size, 4,
231					    RADEON_GEM_DOMAIN_GTT, 0);
232
233		if (!dma_bo->bo) {
234			rcommonFlushCmdBuf(rmesa, __FUNCTION__);
235			goto again_alloc;
236		}
237		insert_at_head(&rmesa->dma.reserved, dma_bo);
238	} else {
239		/* We push and pop buffers from end of list so we can keep
240		   counter on unused buffers for later freeing them from
241		   begin of list */
242		dma_bo = last_elem(&rmesa->dma.free);
243		remove_from_list(dma_bo);
244		insert_at_head(&rmesa->dma.reserved, dma_bo);
245	}
246
247	rmesa->dma.current_used = 0;
248	rmesa->dma.current_vertexptr = 0;
249
250	if (radeon_cs_space_check_with_bo(rmesa->cmdbuf.cs,
251					  first_elem(&rmesa->dma.reserved)->bo,
252					  RADEON_GEM_DOMAIN_GTT, 0))
253		fprintf(stderr,"failure to revalidate BOs - badness\n");
254
255	if (is_empty_list(&rmesa->dma.reserved)) {
256        /* Cmd buff have been flushed in radeon_revalidate_bos */
257		goto again_alloc;
258	}
259	radeon_bo_map(first_elem(&rmesa->dma.reserved)->bo, 1);
260}
261
262/* Allocates a region from rmesa->dma.current.  If there isn't enough
263 * space in current, grab a new buffer (and discard what was left of current)
264 */
265void radeonAllocDmaRegion(radeonContextPtr rmesa,
266			  struct radeon_bo **pbo, int *poffset,
267			  int bytes, int alignment)
268{
269	if (RADEON_DEBUG & RADEON_IOCTL)
270		fprintf(stderr, "%s %d\n", __FUNCTION__, bytes);
271
272	if (rmesa->dma.flush)
273		rmesa->dma.flush(rmesa->glCtx);
274
275	assert(rmesa->dma.current_used == rmesa->dma.current_vertexptr);
276
277	alignment--;
278	rmesa->dma.current_used = (rmesa->dma.current_used + alignment) & ~alignment;
279
280	if (is_empty_list(&rmesa->dma.reserved)
281		|| rmesa->dma.current_used + bytes > first_elem(&rmesa->dma.reserved)->bo->size)
282		radeonRefillCurrentDmaRegion(rmesa, bytes);
283
284	*poffset = rmesa->dma.current_used;
285	*pbo = first_elem(&rmesa->dma.reserved)->bo;
286	radeon_bo_ref(*pbo);
287
288	/* Always align to at least 16 bytes */
289	rmesa->dma.current_used = (rmesa->dma.current_used + bytes + 15) & ~15;
290	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
291
292	assert(rmesa->dma.current_used <= first_elem(&rmesa->dma.reserved)->bo->size);
293}
294
295void radeonFreeDmaRegions(radeonContextPtr rmesa)
296{
297	struct radeon_dma_bo *dma_bo;
298	struct radeon_dma_bo *temp;
299	if (RADEON_DEBUG & RADEON_DMA)
300		fprintf(stderr, "%s\n", __FUNCTION__);
301
302	foreach_s(dma_bo, temp, &rmesa->dma.free) {
303		remove_from_list(dma_bo);
304	        radeon_bo_unref(dma_bo->bo);
305		FREE(dma_bo);
306	}
307
308	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
309		remove_from_list(dma_bo);
310	        radeon_bo_unref(dma_bo->bo);
311		FREE(dma_bo);
312	}
313
314	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
315		remove_from_list(dma_bo);
316	        radeon_bo_unref(dma_bo->bo);
317		FREE(dma_bo);
318	}
319}
320
321void radeonReturnDmaRegion(radeonContextPtr rmesa, int return_bytes)
322{
323	if (is_empty_list(&rmesa->dma.reserved))
324		return;
325
326	if (RADEON_DEBUG & RADEON_IOCTL)
327		fprintf(stderr, "%s %d\n", __FUNCTION__, return_bytes);
328	rmesa->dma.current_used -= return_bytes;
329	rmesa->dma.current_vertexptr = rmesa->dma.current_used;
330}
331
332static int radeon_bo_is_idle(struct radeon_bo* bo)
333{
334	uint32_t domain;
335	int ret = radeon_bo_is_busy(bo, &domain);
336	if (ret == -EINVAL) {
337		WARN_ONCE("Your libdrm or kernel doesn't have support for busy query.\n"
338			"This may cause small performance drop for you.\n");
339	}
340	return ret != -EBUSY;
341}
342
343void radeonReleaseDmaRegions(radeonContextPtr rmesa)
344{
345	struct radeon_dma_bo *dma_bo;
346	struct radeon_dma_bo *temp;
347	const int expire_at = ++rmesa->dma.free.expire_counter + DMA_BO_FREE_TIME;
348	const int time = rmesa->dma.free.expire_counter;
349
350	if (RADEON_DEBUG & RADEON_DMA) {
351		size_t free = 0,
352		       wait = 0,
353		       reserved = 0;
354		foreach(dma_bo, &rmesa->dma.free)
355			++free;
356
357		foreach(dma_bo, &rmesa->dma.wait)
358			++wait;
359
360		foreach(dma_bo, &rmesa->dma.reserved)
361			++reserved;
362
363		fprintf(stderr, "%s: free %zu, wait %zu, reserved %zu, minimum_size: %zu\n",
364		      __FUNCTION__, free, wait, reserved, rmesa->dma.minimum_size);
365	}
366
367	/* move waiting bos to free list.
368	   wait list provides gpu time to handle data before reuse */
369	foreach_s(dma_bo, temp, &rmesa->dma.wait) {
370		if (dma_bo->expire_counter == time) {
371			WARN_ONCE("Leaking dma buffer object!\n");
372			radeon_bo_unref(dma_bo->bo);
373			remove_from_list(dma_bo);
374			FREE(dma_bo);
375			continue;
376		}
377		/* free objects that are too small to be used because of large request */
378		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
379		   radeon_bo_unref(dma_bo->bo);
380		   remove_from_list(dma_bo);
381		   FREE(dma_bo);
382		   continue;
383		}
384		if (!radeon_bo_is_idle(dma_bo->bo)) {
385			break;
386		}
387		remove_from_list(dma_bo);
388		dma_bo->expire_counter = expire_at;
389		insert_at_tail(&rmesa->dma.free, dma_bo);
390	}
391
392	/* move reserved to wait list */
393	foreach_s(dma_bo, temp, &rmesa->dma.reserved) {
394		radeon_bo_unmap(dma_bo->bo);
395		/* free objects that are too small to be used because of large request */
396		if (dma_bo->bo->size < rmesa->dma.minimum_size) {
397		   radeon_bo_unref(dma_bo->bo);
398		   remove_from_list(dma_bo);
399		   FREE(dma_bo);
400		   continue;
401		}
402		remove_from_list(dma_bo);
403		dma_bo->expire_counter = expire_at;
404		insert_at_tail(&rmesa->dma.wait, dma_bo);
405	}
406
407	/* free bos that have been unused for some time */
408	foreach_s(dma_bo, temp, &rmesa->dma.free) {
409		if (dma_bo->expire_counter != time)
410			break;
411		remove_from_list(dma_bo);
412	        radeon_bo_unref(dma_bo->bo);
413		FREE(dma_bo);
414	}
415
416}
417
418
419/* Flush vertices in the current dma region.
420 */
421void rcommon_flush_last_swtcl_prim( struct gl_context *ctx  )
422{
423	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
424	struct radeon_dma *dma = &rmesa->dma;
425
426	if (RADEON_DEBUG & RADEON_IOCTL)
427		fprintf(stderr, "%s\n", __FUNCTION__);
428	dma->flush = NULL;
429
430	radeon_bo_unmap(rmesa->swtcl.bo);
431
432	if (!is_empty_list(&dma->reserved)) {
433	    GLuint current_offset = dma->current_used;
434
435	    assert (dma->current_used +
436		    rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
437		    dma->current_vertexptr);
438
439	    if (dma->current_used != dma->current_vertexptr) {
440		    dma->current_used = dma->current_vertexptr;
441
442		    rmesa->vtbl.swtcl_flush(ctx, current_offset);
443	    }
444	    rmesa->swtcl.numverts = 0;
445	}
446	radeon_bo_unref(rmesa->swtcl.bo);
447	rmesa->swtcl.bo = NULL;
448}
449/* Alloc space in the current dma region.
450 */
451void *
452rcommonAllocDmaLowVerts( radeonContextPtr rmesa, int nverts, int vsize )
453{
454	GLuint bytes = vsize * nverts;
455	void *head;
456	if (RADEON_DEBUG & RADEON_IOCTL)
457		fprintf(stderr, "%s\n", __FUNCTION__);
458
459	if(is_empty_list(&rmesa->dma.reserved)
460	      ||rmesa->dma.current_vertexptr + bytes > first_elem(&rmesa->dma.reserved)->bo->size) {
461		if (rmesa->dma.flush) {
462			rmesa->dma.flush(rmesa->glCtx);
463		}
464
465                radeonRefillCurrentDmaRegion(rmesa, bytes);
466
467		return NULL;
468	}
469
470        if (!rmesa->dma.flush) {
471		/* if cmdbuf flushed DMA restart */
472                rmesa->glCtx->Driver.NeedFlush |= FLUSH_STORED_VERTICES;
473                rmesa->dma.flush = rcommon_flush_last_swtcl_prim;
474        }
475
476	ASSERT( vsize == rmesa->swtcl.vertex_size * 4 );
477        ASSERT( rmesa->dma.flush == rcommon_flush_last_swtcl_prim );
478        ASSERT( rmesa->dma.current_used +
479                rmesa->swtcl.numverts * rmesa->swtcl.vertex_size * 4 ==
480                rmesa->dma.current_vertexptr );
481
482	if (!rmesa->swtcl.bo) {
483		rmesa->swtcl.bo = first_elem(&rmesa->dma.reserved)->bo;
484		radeon_bo_ref(rmesa->swtcl.bo);
485		radeon_bo_map(rmesa->swtcl.bo, 1);
486	}
487
488	head = (rmesa->swtcl.bo->ptr + rmesa->dma.current_vertexptr);
489	rmesa->dma.current_vertexptr += bytes;
490	rmesa->swtcl.numverts += nverts;
491	return head;
492}
493
494void radeonReleaseArrays( struct gl_context *ctx, GLuint newinputs )
495{
496   radeonContextPtr radeon = RADEON_CONTEXT( ctx );
497   int i;
498	if (RADEON_DEBUG & RADEON_IOCTL)
499		fprintf(stderr, "%s\n", __FUNCTION__);
500
501   if (radeon->dma.flush) {
502       radeon->dma.flush(radeon->glCtx);
503   }
504   for (i = 0; i < radeon->tcl.aos_count; i++) {
505      if (radeon->tcl.aos[i].bo) {
506         radeon_bo_unref(radeon->tcl.aos[i].bo);
507         radeon->tcl.aos[i].bo = NULL;
508
509      }
510   }
511}
512