radeon_span.c revision f577c8e462fc924ea436d129ad64c8a1226b5f9c
1/**************************************************************************
2
3Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
4Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5                     VA Linux Systems Inc., Fremont, California.
6
7The Weather Channel (TM) funded Tungsten Graphics to develop the
8initial release of the Radeon 8500 driver under the XFree86 license.
9This notice must be preserved.
10
11All Rights Reserved.
12
13Permission is hereby granted, free of charge, to any person obtaining
14a copy of this software and associated documentation files (the
15"Software"), to deal in the Software without restriction, including
16without limitation the rights to use, copy, modify, merge, publish,
17distribute, sublicense, and/or sell copies of the Software, and to
18permit persons to whom the Software is furnished to do so, subject to
19the following conditions:
20
21The above copyright notice and this permission notice (including the
22next paragraph) shall be included in all copies or substantial
23portions of the Software.
24
25THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
28IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
29LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32
33**************************************************************************/
34
35/*
36 * Authors:
37 *   Kevin E. Martin <martin@valinux.com>
38 *   Gareth Hughes <gareth@valinux.com>
39 *   Keith Whitwell <keith@tungstengraphics.com>
40 *
41 */
42
43#include "main/glheader.h"
44#include "swrast/swrast.h"
45
46#include "radeon_common.h"
47#include "radeon_lock.h"
48#include "radeon_span.h"
49
50#define DBG 0
51
52static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
53
54static GLubyte *radeon_ptr32(const struct radeon_renderbuffer * rrb,
55			     GLint x, GLint y)
56{
57    GLubyte *ptr = rrb->bo->ptr;
58    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
59    GLint offset;
60    GLint nmacroblkpl;
61    GLint nmicroblkpl;
62
63    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
64        offset = x * rrb->cpp + y * rrb->pitch;
65    } else {
66        offset = 0;
67        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
68            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
69                nmacroblkpl = rrb->pitch >> 5;
70                offset += ((y >> 4) * nmacroblkpl) << 11;
71                offset += ((y & 15) >> 1) << 8;
72                offset += (y & 1) << 4;
73                offset += (x >> 5) << 11;
74                offset += ((x & 31) >> 2) << 5;
75                offset += (x & 3) << 2;
76            } else {
77                nmacroblkpl = rrb->pitch >> 6;
78                offset += ((y >> 3) * nmacroblkpl) << 11;
79                offset += (y & 7) << 8;
80                offset += (x >> 6) << 11;
81                offset += ((x & 63) >> 3) << 5;
82                offset += (x & 7) << 2;
83            }
84        } else {
85            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
86            offset += (y * nmicroblkpl) << 5;
87            offset += (x >> 3) << 5;
88            offset += (x & 7) << 2;
89        }
90    }
91    return &ptr[offset];
92}
93
94static GLubyte *radeon_ptr16(const struct radeon_renderbuffer * rrb,
95			     GLint x, GLint y)
96{
97    GLubyte *ptr = rrb->bo->ptr;
98    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
99    GLint offset;
100    GLint nmacroblkpl;
101    GLint nmicroblkpl;
102
103    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
104        offset = x * rrb->cpp + y * rrb->pitch;
105    } else {
106        offset = 0;
107        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
108            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
109                nmacroblkpl = rrb->pitch >> 6;
110                offset += ((y >> 4) * nmacroblkpl) << 11;
111                offset += ((y & 15) >> 1) << 8;
112                offset += (y & 1) << 4;
113                offset += (x >> 6) << 11;
114                offset += ((x & 63) >> 3) << 5;
115                offset += (x & 7) << 1;
116            } else {
117                nmacroblkpl = rrb->pitch >> 7;
118                offset += ((y >> 3) * nmacroblkpl) << 11;
119                offset += (y & 7) << 8;
120                offset += (x >> 7) << 11;
121                offset += ((x & 127) >> 4) << 5;
122                offset += (x & 15) << 2;
123            }
124        } else {
125            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
126            offset += (y * nmicroblkpl) << 5;
127            offset += (x >> 4) << 5;
128            offset += (x & 15) << 2;
129        }
130    }
131    return &ptr[offset];
132}
133
134static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb,
135			   GLint x, GLint y)
136{
137    GLubyte *ptr = rrb->bo->ptr;
138    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
139    GLint offset;
140    GLint microblkxs;
141    GLint macroblkxs;
142    GLint nmacroblkpl;
143    GLint nmicroblkpl;
144
145    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
146        offset = x * rrb->cpp + y * rrb->pitch;
147    } else {
148        offset = 0;
149        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
150            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
151                microblkxs = 16 / rrb->cpp;
152                macroblkxs = 128 / rrb->cpp;
153                nmacroblkpl = rrb->pitch / macroblkxs;
154                offset += ((y >> 4) * nmacroblkpl) << 11;
155                offset += ((y & 15) >> 1) << 8;
156                offset += (y & 1) << 4;
157                offset += (x / macroblkxs) << 11;
158                offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
159                offset += (x & (microblkxs - 1)) * rrb->cpp;
160            } else {
161                microblkxs = 32 / rrb->cpp;
162                macroblkxs = 256 / rrb->cpp;
163                nmacroblkpl = rrb->pitch / macroblkxs;
164                offset += ((y >> 3) * nmacroblkpl) << 11;
165                offset += (y & 7) << 8;
166                offset += (x / macroblkxs) << 11;
167                offset += ((x & (macroblkxs - 1)) / microblkxs) << 5;
168                offset += (x & (microblkxs - 1)) * rrb->cpp;
169            }
170        } else {
171            microblkxs = 32 / rrb->cpp;
172            nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5;
173            offset += (y * nmicroblkpl) << 5;
174            offset += (x / microblkxs) << 5;
175            offset += (x & (microblkxs - 1)) * rrb->cpp;
176        }
177    }
178    return &ptr[offset];
179}
180
181
182/*
183 * Note that all information needed to access pixels in a renderbuffer
184 * should be obtained through the gl_renderbuffer parameter, not per-context
185 * information.
186 */
187#define LOCAL_VARS						\
188   struct radeon_context *radeon = RADEON_CONTEXT(ctx);			\
189   struct radeon_renderbuffer *rrb = (void *) rb;		\
190   const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;			\
191   const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
192   unsigned int num_cliprects;						\
193   struct drm_clip_rect *cliprects;					\
194   int x_off, y_off;							\
195   GLuint p;						\
196   (void)p;						\
197   radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
198
199#define LOCAL_DEPTH_VARS				\
200   struct radeon_context *radeon = RADEON_CONTEXT(ctx);			\
201   struct radeon_renderbuffer *rrb = (void *) rb;	\
202   const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;			\
203   const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
204   unsigned int num_cliprects;						\
205   struct drm_clip_rect *cliprects;					\
206   int x_off, y_off;							\
207  radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
208
209#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
210
211#define Y_FLIP(_y) ((_y) * yScale + yBias)
212
213#define HW_LOCK()
214
215#define HW_UNLOCK()
216
217/* XXX FBO: this is identical to the macro in spantmp2.h except we get
218 * the cliprect info from the context, not the driDrawable.
219 * Move this into spantmp2.h someday.
220 */
221#define HW_CLIPLOOP()							\
222   do {									\
223      int _nc = num_cliprects;						\
224      while ( _nc-- ) {							\
225	 int minx = cliprects[_nc].x1 - x_off;				\
226	 int miny = cliprects[_nc].y1 - y_off;				\
227	 int maxx = cliprects[_nc].x2 - x_off;				\
228	 int maxy = cliprects[_nc].y2 - y_off;
229
230/* ================================================================
231 * Color buffer
232 */
233
234/* 16 bit, RGB565 color spanline and pixel functions
235 */
236#define SPANTMP_PIXEL_FMT GL_RGB
237#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
238
239#define TAG(x)    radeon##x##_RGB565
240#define TAG2(x,y) radeon##x##_RGB565##y
241#define GET_PTR(X,Y) radeon_ptr16(rrb, (X) + x_off, (Y) + y_off)
242#include "spantmp2.h"
243
244/* 32 bit, xRGB8888 color spanline and pixel functions
245 */
246#define SPANTMP_PIXEL_FMT GL_BGRA
247#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
248
249#define TAG(x)    radeon##x##_xRGB8888
250#define TAG2(x,y) radeon##x##_xRGB8888##y
251#define GET_PTR(X,Y) radeon_ptr32(rrb, (X) + x_off, (Y) + y_off)
252#include "spantmp2.h"
253
254/* 32 bit, ARGB8888 color spanline and pixel functions
255 */
256#define SPANTMP_PIXEL_FMT GL_BGRA
257#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
258
259#define TAG(x)    radeon##x##_ARGB8888
260#define TAG2(x,y) radeon##x##_ARGB8888##y
261#define GET_PTR(X,Y) radeon_ptr32(rrb, (X) + x_off, (Y) + y_off)
262#include "spantmp2.h"
263
264/* ================================================================
265 * Depth buffer
266 */
267
268/* The Radeon family has depth tiling on all the time, so we have to convert
269 * the x,y coordinates into the memory bus address (mba) in the same
270 * manner as the engine.  In each case, the linear block address (ba)
271 * is calculated, and then wired with x and y to produce the final
272 * memory address.
273 * The chip will do address translation on its own if the surface registers
274 * are set up correctly. It is not quite enough to get it working with hyperz
275 * too...
276 */
277
278/* 16-bit depth buffer functions
279 */
280#define VALUE_TYPE GLushort
281
282#define WRITE_DEPTH( _x, _y, d )					\
283   *(GLushort *)radeon_ptr(rrb, _x + x_off, _y + y_off) = d
284
285#define READ_DEPTH( d, _x, _y )						\
286   d = *(GLushort *)radeon_ptr(rrb, _x + x_off, _y + y_off)
287
288#define TAG(x) radeon##x##_z16
289#include "depthtmp.h"
290
291/* 24 bit depth, 8 bit stencil depthbuffer functions
292 *
293 * Careful: It looks like the R300 uses ZZZS byte order while the R200
294 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
295 */
296#define VALUE_TYPE GLuint
297
298#ifdef COMPILE_R300
299#define WRITE_DEPTH( _x, _y, d )					\
300do {									\
301   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );		\
302   GLuint tmp = *_ptr;				\
303   tmp &= 0x000000ff;							\
304   tmp |= ((d << 8) & 0xffffff00);					\
305   *_ptr = tmp;					\
306} while (0)
307#else
308#define WRITE_DEPTH( _x, _y, d )					\
309do {									\
310   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );	\
311   GLuint tmp = *_ptr;							\
312   tmp &= 0xff000000;							\
313   tmp |= ((d) & 0x00ffffff);						\
314   *_ptr = tmp;					\
315} while (0)
316#endif
317
318#ifdef COMPILE_R300
319#define READ_DEPTH( d, _x, _y )						\
320  do { \
321    d = (*(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
322  }while(0)
323#else
324#define READ_DEPTH( d, _x, _y )						\
325   d = *(GLuint*)(radeon_ptr32(rrb, _x + x_off,	_y + y_off )) & 0x00ffffff;
326#endif
327/*
328    fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\
329   d = *(GLuint*)(radeon_ptr(rrb, _x,	_y )) & 0x00ffffff;
330*/
331#define TAG(x) radeon##x##_z24_s8
332#include "depthtmp.h"
333
334/* ================================================================
335 * Stencil buffer
336 */
337
338/* 24 bit depth, 8 bit stencil depthbuffer functions
339 */
340#ifdef COMPILE_R300
341#define WRITE_STENCIL( _x, _y, d )					\
342do {									\
343   GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + x_off, _y + y_off);		\
344   GLuint tmp = *_ptr;				\
345   tmp &= 0xffffff00;							\
346   tmp |= (d) & 0xff;							\
347   *_ptr = tmp;					\
348} while (0)
349#else
350#define WRITE_STENCIL( _x, _y, d )					\
351do {									\
352   GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + x_off, _y + y_off);		\
353   GLuint tmp = *_ptr;				\
354   tmp &= 0x00ffffff;							\
355   tmp |= (((d) & 0xff) << 24);						\
356   *_ptr = tmp;					\
357} while (0)
358#endif
359
360#ifdef COMPILE_R300
361#define READ_STENCIL( d, _x, _y )					\
362do {									\
363   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );		\
364   GLuint tmp = *_ptr;				\
365   d = tmp & 0x000000ff;						\
366} while (0)
367#else
368#define READ_STENCIL( d, _x, _y )					\
369do {									\
370   GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off );		\
371   GLuint tmp = *_ptr;				\
372   d = (tmp & 0xff000000) >> 24;					\
373} while (0)
374#endif
375
376#define TAG(x) radeon##x##_z24_s8
377#include "stenciltmp.h"
378
379
380static void map_unmap_rb(struct gl_renderbuffer *rb, int flag)
381{
382	struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
383	int r;
384
385	if (rrb == NULL || !rrb->bo)
386		return;
387
388	if (flag) {
389		r = radeon_bo_map(rrb->bo, 1);
390		if (r) {
391			fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
392				__FUNCTION__, r);
393		}
394
395		radeonSetSpanFunctions(rrb);
396	} else {
397		radeon_bo_unmap(rrb->bo);
398		rb->GetRow = NULL;
399		rb->PutRow = NULL;
400	}
401}
402
403static void
404radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map)
405{
406	GLuint i, j;
407
408	/* color draw buffers */
409	for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++)
410		map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map);
411
412	/* check for render to textures */
413	for (i = 0; i < BUFFER_COUNT; i++) {
414		struct gl_renderbuffer_attachment *att =
415			ctx->DrawBuffer->Attachment + i;
416		struct gl_texture_object *tex = att->Texture;
417		if (tex) {
418			/* render to texture */
419			ASSERT(att->Renderbuffer);
420			if (map)
421				ctx->Driver.MapTexture(ctx, tex);
422			else
423				ctx->Driver.UnmapTexture(ctx, tex);
424		}
425	}
426
427	map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map);
428
429	/* depth buffer (Note wrapper!) */
430	if (ctx->DrawBuffer->_DepthBuffer)
431		map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map);
432
433	if (ctx->DrawBuffer->_StencilBuffer)
434		map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map);
435
436}
437static void radeonSpanRenderStart(GLcontext * ctx)
438{
439	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
440	int i;
441
442	radeon_firevertices(rmesa);
443
444	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
445		if (ctx->Texture.Unit[i]._ReallyEnabled)
446			ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
447	}
448
449	radeon_map_unmap_buffers(ctx, 1);
450
451	/* The locking and wait for idle should really only be needed in classic mode.
452	 * In a future memory manager based implementation, this should become
453	 * unnecessary due to the fact that mapping our buffers, textures, etc.
454	 * should implicitly wait for any previous rendering commands that must
455	 * be waited on. */
456	LOCK_HARDWARE(rmesa);
457	radeonWaitForIdleLocked(rmesa);
458}
459
460static void radeonSpanRenderFinish(GLcontext * ctx)
461{
462	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
463	int i;
464	_swrast_flush(ctx);
465	UNLOCK_HARDWARE(rmesa);
466
467	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
468		if (ctx->Texture.Unit[i]._ReallyEnabled)
469			ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
470	}
471
472	radeon_map_unmap_buffers(ctx, 0);
473}
474
475void radeonInitSpanFuncs(GLcontext * ctx)
476{
477	struct swrast_device_driver *swdd =
478	    _swrast_GetDeviceDriverReference(ctx);
479	swdd->SpanRenderStart = radeonSpanRenderStart;
480	swdd->SpanRenderFinish = radeonSpanRenderFinish;
481}
482
483/**
484 * Plug in the Get/Put routines for the given driRenderbuffer.
485 */
486static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
487{
488	if (rrb->base._ActualFormat == GL_RGB5) {
489		radeonInitPointers_RGB565(&rrb->base);
490	} else if (rrb->base._ActualFormat == GL_RGB8) {
491		radeonInitPointers_xRGB8888(&rrb->base);
492	} else if (rrb->base._ActualFormat == GL_RGBA8) {
493		radeonInitPointers_ARGB8888(&rrb->base);
494	} else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT16) {
495		radeonInitDepthPointers_z16(&rrb->base);
496	} else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT24) {
497		radeonInitDepthPointers_z24_s8(&rrb->base);
498	} else if (rrb->base._ActualFormat == GL_DEPTH24_STENCIL8_EXT) {
499		radeonInitStencilPointers_z24_s8(&rrb->base);
500	} else if (rrb->base._ActualFormat == GL_STENCIL_INDEX8_EXT) {
501		radeonInitStencilPointers_z24_s8(&rrb->base);
502	}
503}
504