radeon_span.c revision ed3a1cce73fcd0d6f4b6e9b5f69a98ad179ddc4b
1/**************************************************************************
2
3Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
4Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5                     VA Linux Systems Inc., Fremont, California.
6
7The Weather Channel (TM) funded Tungsten Graphics to develop the
8initial release of the Radeon 8500 driver under the XFree86 license.
9This notice must be preserved.
10
11All Rights Reserved.
12
13Permission is hereby granted, free of charge, to any person obtaining
14a copy of this software and associated documentation files (the
15"Software"), to deal in the Software without restriction, including
16without limitation the rights to use, copy, modify, merge, publish,
17distribute, sublicense, and/or sell copies of the Software, and to
18permit persons to whom the Software is furnished to do so, subject to
19the following conditions:
20
21The above copyright notice and this permission notice (including the
22next paragraph) shall be included in all copies or substantial
23portions of the Software.
24
25THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
28IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
29LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32
33**************************************************************************/
34
35/*
36 * Authors:
37 *   Kevin E. Martin <martin@valinux.com>
38 *   Gareth Hughes <gareth@valinux.com>
39 *   Keith Whitwell <keith@tungstengraphics.com>
40 *
41 */
42
43#include "main/glheader.h"
44#include "swrast/swrast.h"
45
46#include "radeon_context.h"
47#include "radeon_ioctl.h"
48#include "radeon_state.h"
49#include "radeon_span.h"
50#include "radeon_tex.h"
51
52#include "drirenderbuffer.h"
53
54#define DBG 0
55
56/*
57 * Note that all information needed to access pixels in a renderbuffer
58 * should be obtained through the gl_renderbuffer parameter, not per-context
59 * information.
60 */
61#define LOCAL_VARS						\
62   driRenderbuffer *drb = (driRenderbuffer *) rb;		\
63   const __DRIdrawablePrivate *dPriv = drb->dPriv;		\
64   const GLuint bottom = dPriv->h - 1;				\
65   GLubyte *buf = (GLubyte *) drb->flippedData			\
66      + (dPriv->y * drb->flippedPitch + dPriv->x) * drb->cpp;	\
67   GLuint p;							\
68   (void) p;
69
70#define LOCAL_DEPTH_VARS				\
71   driRenderbuffer *drb = (driRenderbuffer *) rb;	\
72   const __DRIdrawablePrivate *dPriv = drb->dPriv;	\
73   const GLuint bottom = dPriv->h - 1;			\
74   GLuint xo = dPriv->x;				\
75   GLuint yo = dPriv->y;				\
76   GLubyte *buf = (GLubyte *) drb->Base.Data;
77
78#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
79
80#define Y_FLIP(Y) (bottom - (Y))
81
82#define HW_LOCK()
83
84#define HW_UNLOCK()
85
86/* ================================================================
87 * Color buffer
88 */
89
90/* 16 bit, RGB565 color spanline and pixel functions
91 */
92#define SPANTMP_PIXEL_FMT GL_RGB
93#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
94
95#define TAG(x)    radeon##x##_RGB565
96#define TAG2(x,y) radeon##x##_RGB565##y
97#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 2)
98#include "spantmp2.h"
99
100/* 32 bit, ARGB8888 color spanline and pixel functions
101 */
102#define SPANTMP_PIXEL_FMT GL_BGRA
103#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
104
105#define TAG(x)    radeon##x##_ARGB8888
106#define TAG2(x,y) radeon##x##_ARGB8888##y
107#define GET_PTR(X,Y) (buf + ((Y) * drb->flippedPitch + (X)) * 4)
108#include "spantmp2.h"
109
110/* ================================================================
111 * Depth buffer
112 */
113
114/* The Radeon family has depth tiling on all the time, so we have to convert
115 * the x,y coordinates into the memory bus address (mba) in the same
116 * manner as the engine.  In each case, the linear block address (ba)
117 * is calculated, and then wired with x and y to produce the final
118 * memory address.
119 * The chip will do address translation on its own if the surface registers
120 * are set up correctly. It is not quite enough to get it working with hyperz
121 * too...
122 */
123
124static GLuint radeon_mba_z32(const driRenderbuffer * drb, GLint x, GLint y)
125{
126	GLuint pitch = drb->pitch;
127	if (drb->depthHasSurface) {
128		return 4 * (x + y * pitch);
129	} else {
130		GLuint ba, address = 0;	/* a[0..1] = 0           */
131
132#ifdef COMPILE_R300
133		ba = (y / 8) * (pitch / 8) + (x / 8);
134#else
135		ba = (y / 16) * (pitch / 16) + (x / 16);
136#endif
137
138		address |= (x & 0x7) << 2;	/* a[2..4] = x[0..2]     */
139		address |= (y & 0x3) << 5;	/* a[5..6] = y[0..1]     */
140		address |= (((x & 0x10) >> 2) ^ (y & 0x4)) << 5;	/* a[7]    = x[4] ^ y[2] */
141		address |= (ba & 0x3) << 8;	/* a[8..9] = ba[0..1]    */
142
143		address |= (y & 0x8) << 7;	/* a[10]   = y[3]        */
144		address |= (((x & 0x8) << 1) ^ (y & 0x10)) << 7;	/* a[11]   = x[3] ^ y[4] */
145		address |= (ba & ~0x3) << 10;	/* a[12..] = ba[2..]     */
146
147		return address;
148	}
149}
150
151static INLINE GLuint
152radeon_mba_z16(const driRenderbuffer * drb, GLint x, GLint y)
153{
154	GLuint pitch = drb->pitch;
155	if (drb->depthHasSurface) {
156		return 2 * (x + y * pitch);
157	} else {
158		GLuint ba, address = 0;	/* a[0]    = 0           */
159
160		ba = (y / 16) * (pitch / 32) + (x / 32);
161
162		address |= (x & 0x7) << 1;	/* a[1..3] = x[0..2]     */
163		address |= (y & 0x7) << 4;	/* a[4..6] = y[0..2]     */
164		address |= (x & 0x8) << 4;	/* a[7]    = x[3]        */
165		address |= (ba & 0x3) << 8;	/* a[8..9] = ba[0..1]    */
166		address |= (y & 0x8) << 7;	/* a[10]   = y[3]        */
167		address |= ((x & 0x10) ^ (y & 0x10)) << 7;	/* a[11]   = x[4] ^ y[4] */
168		address |= (ba & ~0x3) << 10;	/* a[12..] = ba[2..]     */
169
170		return address;
171	}
172}
173
174/* 16-bit depth buffer functions
175 */
176#define VALUE_TYPE GLushort
177
178#define WRITE_DEPTH( _x, _y, d )					\
179   *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo )) = d;
180
181#define READ_DEPTH( d, _x, _y )						\
182   d = *(GLushort *)(buf + radeon_mba_z16( drb, _x + xo, _y + yo ));
183
184#define TAG(x) radeon##x##_z16
185#include "depthtmp.h"
186
187/* 24 bit depth, 8 bit stencil depthbuffer functions
188 *
189 * Careful: It looks like the R300 uses ZZZS byte order while the R200
190 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
191 */
192#define VALUE_TYPE GLuint
193
194#ifdef COMPILE_R300
195#define WRITE_DEPTH( _x, _y, d )					\
196do {									\
197   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
198   GLuint tmp = *(GLuint *)(buf + offset);				\
199   tmp &= 0x000000ff;							\
200   tmp |= ((d << 8) & 0xffffff00);					\
201   *(GLuint *)(buf + offset) = tmp;					\
202} while (0)
203#else
204#define WRITE_DEPTH( _x, _y, d )					\
205do {									\
206   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
207   GLuint tmp = *(GLuint *)(buf + offset);				\
208   tmp &= 0xff000000;							\
209   tmp |= ((d) & 0x00ffffff);						\
210   *(GLuint *)(buf + offset) = tmp;					\
211} while (0)
212#endif
213
214#ifdef COMPILE_R300
215#define READ_DEPTH( d, _x, _y )						\
216  do { \
217    d = (*(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,		\
218					 _y + yo )) & 0xffffff00) >> 8; \
219  }while(0)
220#else
221#define READ_DEPTH( d, _x, _y )						\
222   d = *(GLuint *)(buf + radeon_mba_z32( drb, _x + xo,			\
223					 _y + yo )) & 0x00ffffff;
224#endif
225
226#define TAG(x) radeon##x##_z24_s8
227#include "depthtmp.h"
228
229/* ================================================================
230 * Stencil buffer
231 */
232
233/* 24 bit depth, 8 bit stencil depthbuffer functions
234 */
235#ifdef COMPILE_R300
236#define WRITE_STENCIL( _x, _y, d )					\
237do {									\
238   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
239   GLuint tmp = *(GLuint *)(buf + offset);				\
240   tmp &= 0xffffff00;							\
241   tmp |= (d) & 0xff;							\
242   *(GLuint *)(buf + offset) = tmp;					\
243} while (0)
244#else
245#define WRITE_STENCIL( _x, _y, d )					\
246do {									\
247   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
248   GLuint tmp = *(GLuint *)(buf + offset);				\
249   tmp &= 0x00ffffff;							\
250   tmp |= (((d) & 0xff) << 24);						\
251   *(GLuint *)(buf + offset) = tmp;					\
252} while (0)
253#endif
254
255#ifdef COMPILE_R300
256#define READ_STENCIL( d, _x, _y )					\
257do {									\
258   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
259   GLuint tmp = *(GLuint *)(buf + offset);				\
260   d = tmp & 0x000000ff;						\
261} while (0)
262#else
263#define READ_STENCIL( d, _x, _y )					\
264do {									\
265   GLuint offset = radeon_mba_z32( drb, _x + xo, _y + yo );		\
266   GLuint tmp = *(GLuint *)(buf + offset);				\
267   d = (tmp & 0xff000000) >> 24;					\
268} while (0)
269#endif
270
271#define TAG(x) radeon##x##_z24_s8
272#include "stenciltmp.h"
273
274/* Move locking out to get reasonable span performance (10x better
275 * than doing this in HW_LOCK above).  WaitForIdle() is the main
276 * culprit.
277 */
278
279static void radeonSpanRenderStart(GLcontext * ctx)
280{
281	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
282#ifdef COMPILE_R300
283	r300ContextPtr r300 = (r300ContextPtr) rmesa;
284	R300_FIREVERTICES(r300);
285#else
286	r100ContextPtr r100 = (r100ContextPtr) rmesa;
287	RADEON_FIREVERTICES(r100);
288#endif
289	LOCK_HARDWARE(rmesa);
290	radeonWaitForIdleLocked(rmesa);
291}
292
293static void radeonSpanRenderFinish(GLcontext * ctx)
294{
295	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
296	_swrast_flush(ctx);
297	UNLOCK_HARDWARE(rmesa);
298}
299
300void radeonInitSpanFuncs(GLcontext * ctx)
301{
302	struct swrast_device_driver *swdd =
303	    _swrast_GetDeviceDriverReference(ctx);
304	swdd->SpanRenderStart = radeonSpanRenderStart;
305	swdd->SpanRenderFinish = radeonSpanRenderFinish;
306}
307
308/**
309 * Plug in the Get/Put routines for the given driRenderbuffer.
310 */
311void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
312{
313	if (rrb->base.InternalFormat == GL_RGB5) {
314		radeonInitPointers_RGB565(&rrb->base);
315	} else if (rrb->base.InternalFormat == GL_RGBA8) {
316		radeonInitPointers_ARGB8888(&rrb->base);
317	} else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT16) {
318		radeonInitDepthPointers_z16(&rrb->base);
319	} else if (rrb->base.InternalFormat == GL_DEPTH_COMPONENT24) {
320		radeonInitDepthPointers_z24_s8(&rrb->base);
321	} else if (rrb->base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
322		radeonInitStencilPointers_z24_s8(&rrb->base);
323	}
324}
325