radeon_span.c revision cea0c2b14426bf315ae606656274dae5eedd1b6a
1/**************************************************************************
2
3Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
4Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5                     VA Linux Systems Inc., Fremont, California.
6
7The Weather Channel (TM) funded Tungsten Graphics to develop the
8initial release of the Radeon 8500 driver under the XFree86 license.
9This notice must be preserved.
10
11All Rights Reserved.
12
13Permission is hereby granted, free of charge, to any person obtaining
14a copy of this software and associated documentation files (the
15"Software"), to deal in the Software without restriction, including
16without limitation the rights to use, copy, modify, merge, publish,
17distribute, sublicense, and/or sell copies of the Software, and to
18permit persons to whom the Software is furnished to do so, subject to
19the following conditions:
20
21The above copyright notice and this permission notice (including the
22next paragraph) shall be included in all copies or substantial
23portions of the Software.
24
25THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
28IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
29LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32
33**************************************************************************/
34
35/*
36 * Authors:
37 *   Kevin E. Martin <martin@valinux.com>
38 *   Gareth Hughes <gareth@valinux.com>
39 *   Keith Whitwell <keith@tungstengraphics.com>
40 *
41 */
42
43#include "glheader.h"
44#include "swrast/swrast.h"
45
46#include "radeon_context.h"
47#include "radeon_ioctl.h"
48#include "radeon_state.h"
49#include "radeon_span.h"
50#include "radeon_tex.h"
51
52#include "drirenderbuffer.h"
53
54#define DBG 0
55
56/*
57 * Note that all information needed to access pixels in a renderbuffer
58 * should be obtained through the gl_renderbuffer parameter, not per-context
59 * information.
60 */
61#define LOCAL_VARS						\
62   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
63   const __DRIdrawablePrivate *dPriv = drb->dPriv;		\
64   const GLuint bottom = dPriv->h - 1;				\
65   GLubyte *buf = (GLubyte *) drb->flippedData			\
66      + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp;	\
67   GLuint p;							\
68   (void) p;
69
70#define LOCAL_DEPTH_VARS				\
71   driRenderbuffer *drb = (driRenderbuffer *) rb;	\
72   const __DRIdrawablePrivate *dPriv = drb->dPriv;	\
73   const GLuint bottom = dPriv->h - 1;			\
74   GLuint xo = dPriv->x;				\
75   GLuint yo = dPriv->y;				\
76   GLubyte *buf = (GLubyte *) drb->Base.Data;
77
78#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
79
80#define Y_FLIP(Y) (bottom - (Y))
81
82#define HW_LOCK()
83
84#define HW_UNLOCK()
85
86/* ================================================================
87 * Color buffer
88 */
89
90/* 16 bit, RGB565 color spanline and pixel functions
91 */
92#define SPANTMP_PIXEL_FMT GL_RGB
93#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
94
95#define TAG(x)    radeon##x##_RGB565
96#define TAG2(x,y) radeon##x##_RGB565##y
97#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
98#include "spantmp2.h"
99
100/* 32 bit, ARGB8888 color spanline and pixel functions
101 */
102#define SPANTMP_PIXEL_FMT GL_BGRA
103#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
104
105#define TAG(x)    radeon##x##_ARGB8888
106#define TAG2(x,y) radeon##x##_ARGB8888##y
107#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
108#include "spantmp2.h"
109
110/* ================================================================
111 * Depth buffer
112 */
113
114/* The Radeon family has depth tiling on all the time, so we have to convert
115 * the x,y coordinates into the memory bus address (mba) in the same
116 * manner as the engine.  In each case, the linear block address (ba)
117 * is calculated, and then wired with x and y to produce the final
118 * memory address.
119 * The chip will do address translation on its own if the surface registers
120 * are set up correctly. It is not quite enough to get it working with hyperz
121 * too...
122 */
123
124static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
125{
126	GLuint pitch = drb->pitch;
127	if (drb->depthHasSurface) {
128		return 4 * (x + y * pitch);
129	} else {
130		GLuint ba, address = 0;	/* a[0..1] = 0           */
131
132		ba = (y / 16) * (pitch / 16) + (x / 16);
133
134		address |= (x & 0x7) << 2;	/* a[2..4] = x[0..2]     */
135		address |= (y & 0x3) << 5;	/* a[5..6] = y[0..1]     */
136		address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5;	/* a[7]    = x[4] ^ y[2] */
137		address |= (ba & 0x3) << 8;	/* a[8..9] = ba[0..1]    */
138
139		address |= (y & 0x8) << 7;	/* a[10]   = y[3]        */
140		address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7;	/* a[11]   = x[3] ^ y[4] */
141		address |= (ba & ~0x3) << 10;	/* a[12..] = ba[2..]     */
142
143		return address;
144	}
145}
146
147static INLINE GLuint
148radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
149{
150	GLuint pitch = drb->pitch;
151	if (drb->depthHasSurface) {
152		return 2 * (x + y * pitch);
153	} else {
154		GLuint ba, address = 0;	/* a[0]    = 0           */
155
156		ba = (y / 16) * (pitch / 32) + (x / 32);
157
158		address |= (x & 0x7) << 1;	/* a[1..3] = x[0..2]     */
159		address |= (y & 0x7) << 4;	/* a[4..6] = y[0..2]     */
160		address |= (x & 0x8) << 4;	/* a[7]    = x[3]        */
161		address |= (ba & 0x3) << 8;	/* a[8..9] = ba[0..1]    */
162		address |= (y & 0x8) << 7;	/* a[10]   = y[3]        */
163		address |= ((x & 0x10) ^ (y & 0x10)) << 7;	/* a[11]   = x[4] ^ y[4] */
164		address |= (ba & ~0x3) << 10;	/* a[12..] = ba[2..]     */
165
166		return address;
167	}
168}
169
170/* 16-bit depth buffer functions
171 */
172#define WRITE_DEPTH( _x, _y, d )					\
173   *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
174
175#define READ_DEPTH( d, _x, _y )						\
176   d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo ));
177
178#define TAG(x) radeon##x##_z16
179#include "depthtmp.h"
180
181/* 24 bit depth, 8 bit stencil depthbuffer functions
182 */
183#define WRITE_DEPTH( _x, _y, d )					\
184do {									\
185   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
186   GLuint tmp = *(GLuint *)(buf + offset);				\
187   tmp &= 0xff000000;							\
188   tmp |= ((d) & 0x00ffffff);						\
189   *(GLuint *)(buf + offset) = tmp;					\
190} while (0)
191
192#define READ_DEPTH( d, _x, _y )						\
193   d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,			\
194					 _y + yo )) & 0x00ffffff;
195
196#define TAG(x) radeon##x##_z24_s8
197#include "depthtmp.h"
198
199/* ================================================================
200 * Stencil buffer
201 */
202
203/* 24 bit depth, 8 bit stencil depthbuffer functions
204 */
205#define WRITE_STENCIL( _x, _y, d )					\
206do {									\
207   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
208   GLuint tmp = *(GLuint *)(buf + offset);				\
209   tmp &= 0x00ffffff;							\
210   tmp |= (((d) & 0xff) << 24);						\
211   *(GLuint *)(buf + offset) = tmp;					\
212} while (0)
213
214#define READ_STENCIL( d, _x, _y )					\
215do {									\
216   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
217   GLuint tmp = *(GLuint *)(buf + offset);				\
218   tmp &= 0xff000000;							\
219   d = tmp >> 24;							\
220} while (0)
221
222#define TAG(x) radeon##x##_z24_s8
223#include "stenciltmp.h"
224
225/* Move locking out to get reasonable span performance (10x better
226 * than doing this in HW_LOCK above).  WaitForIdle() is the main
227 * culprit.
228 */
229
230static void radeonSpanRenderStart(GLcontext * ctx)
231{
232	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
233	RADEON_FIREVERTICES(rmesa);
234	LOCK_HARDWARE(rmesa);
235	radeonWaitForIdleLocked(rmesa);
236}
237
238static void radeonSpanRenderFinish(GLcontext * ctx)
239{
240	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
241	_swrast_flush(ctx);
242	UNLOCK_HARDWARE(rmesa);
243}
244
245void radeonInitSpanFuncs(GLcontext * ctx)
246{
247	struct swrast_device_driver *swdd =
248	    _swrast_GetDeviceDriverReference(ctx);
249	swdd->SpanRenderStart = radeonSpanRenderStart;
250	swdd->SpanRenderFinish = radeonSpanRenderFinish;
251}
252
253/**
254 * Plug in the Get/Put routines for the given driRenderbuffer.
255 */
256void radeonSetSpanFunctions(driRenderbuffer * drb, const GLvisual * vis)
257{
258	if (drb->Base.InternalFormat == GL_RGBA) {
259		if (vis->redBits == 5 && vis->greenBits == 6
260		    && vis->blueBits == 5) {
261			radeonInitPointers_RGB565(&drb->Base);
262		} else {
263			radeonInitPointers_ARGB8888(&drb->Base);
264		}
265	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
266		radeonInitDepthPointers_z16(&drb->Base);
267	} else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
268		radeonInitDepthPointers_z24_s8(&drb->Base);
269	} else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
270		radeonInitStencilPointers_z24_s8(&drb->Base);
271	}
272}
273