radeon_span.c revision e4b2356c07d31fbeeabb13b2fb47db703b473080
1/* $XFree86: xc/lib/GL/mesa/src/drv/radeon/radeon_span.c,v 1.6 2002/10/30 12:51:56 alanh Exp $ */
2/**************************************************************************
3
4Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5                     VA Linux Systems Inc., Fremont, California.
6
7All Rights Reserved.
8
9Permission is hereby granted, free of charge, to any person obtaining
10a copy of this software and associated documentation files (the
11"Software"), to deal in the Software without restriction, including
12without limitation the rights to use, copy, modify, merge, publish,
13distribute, sublicense, and/or sell copies of the Software, and to
14permit persons to whom the Software is furnished to do so, subject to
15the following conditions:
16
17The above copyright notice and this permission notice (including the
18next paragraph) shall be included in all copies or substantial
19portions of the Software.
20
21THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
23MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
24IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
25LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
26OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
27WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
28
29**************************************************************************/
30
31/*
32 * Authors:
33 *   Kevin E. Martin <martin@valinux.com>
34 *   Gareth Hughes <gareth@valinux.com>
35 *   Keith Whitwell <keith@tungstengraphics.com>
36 *
37 */
38
39#include "glheader.h"
40#include "swrast/swrast.h"
41
42#include "radeon_context.h"
43#include "radeon_ioctl.h"
44#include "radeon_state.h"
45#include "radeon_span.h"
46#include "radeon_tex.h"
47
48#define DBG 0
49
50#define LOCAL_VARS							\
51   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);			\
52   radeonScreenPtr radeonScreen = rmesa->radeonScreen;			\
53   __DRIscreenPrivate *sPriv = rmesa->dri.screen;			\
54   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;			\
55   GLuint pitch = radeonScreen->frontPitch * radeonScreen->cpp;		\
56   GLuint height = dPriv->h;						\
57   char *buf = (char *)(sPriv->pFB +					\
58			rmesa->state.color.drawOffset +			\
59			(dPriv->x * radeonScreen->cpp) +		\
60			(dPriv->y * pitch));				\
61   char *read_buf = (char *)(sPriv->pFB +				\
62			     rmesa->state.pixel.readOffset +		\
63			     (dPriv->x * radeonScreen->cpp) +		\
64			     (dPriv->y * pitch));			\
65   GLuint p;								\
66   (void) read_buf; (void) buf; (void) p
67
68#define LOCAL_DEPTH_VARS						\
69   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);			\
70   radeonScreenPtr radeonScreen = rmesa->radeonScreen;			\
71   __DRIscreenPrivate *sPriv = rmesa->dri.screen;			\
72   __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;			\
73   GLuint height = dPriv->h;						\
74   GLuint xo = dPriv->x;						\
75   GLuint yo = dPriv->y;						\
76   char *buf = (char *)(sPriv->pFB + radeonScreen->depthOffset);	\
77   (void) buf
78
79#define LOCAL_STENCIL_VARS	LOCAL_DEPTH_VARS
80
81
82#define CLIPPIXEL( _x, _y )						\
83   ((_x >= minx) && (_x < maxx) && (_y >= miny) && (_y < maxy))
84
85
86#define CLIPSPAN( _x, _y, _n, _x1, _n1, _i )				\
87   if ( _y < miny || _y >= maxy ) {					\
88      _n1 = 0, _x1 = x;							\
89   } else {								\
90      _n1 = _n;								\
91      _x1 = _x;								\
92      if ( _x1 < minx ) _i += (minx-_x1), n1 -= (minx-_x1), _x1 = minx; \
93      if ( _x1 + _n1 >= maxx ) n1 -= (_x1 + n1 - maxx);		        \
94   }
95
96#define Y_FLIP( _y )		(height - _y - 1)
97
98
99#define HW_LOCK()
100
101#define HW_CLIPLOOP()							\
102   do {									\
103      __DRIdrawablePrivate *dPriv = rmesa->dri.drawable;		\
104      int _nc = dPriv->numClipRects;					\
105									\
106      while ( _nc-- ) {							\
107	 int minx = dPriv->pClipRects[_nc].x1 - dPriv->x;		\
108	 int miny = dPriv->pClipRects[_nc].y1 - dPriv->y;		\
109	 int maxx = dPriv->pClipRects[_nc].x2 - dPriv->x;		\
110	 int maxy = dPriv->pClipRects[_nc].y2 - dPriv->y;
111
112#define HW_ENDCLIPLOOP()						\
113      }									\
114   } while (0)
115
116#define HW_UNLOCK()
117
118
119
120/* ================================================================
121 * Color buffer
122 */
123
124/* 16 bit, RGB565 color spanline and pixel functions
125 */
126#define INIT_MONO_PIXEL(p, color) \
127  p = PACK_COLOR_565( color[0], color[1], color[2] )
128
129#define WRITE_RGBA( _x, _y, r, g, b, a )				\
130   *(GLushort *)(buf + _x*2 + _y*pitch) = ((((int)r & 0xf8) << 8) |	\
131					   (((int)g & 0xfc) << 3) |	\
132					   (((int)b & 0xf8) >> 3))
133
134#define WRITE_PIXEL( _x, _y, p )					\
135   *(GLushort *)(buf + _x*2 + _y*pitch) = p
136
137#define READ_RGBA( rgba, _x, _y )					\
138   do {									\
139      GLushort p = *(GLushort *)(read_buf + _x*2 + _y*pitch);		\
140      rgba[0] = ((p >> 8) & 0xf8) * 255 / 0xf8;				\
141      rgba[1] = ((p >> 3) & 0xfc) * 255 / 0xfc;				\
142      rgba[2] = ((p << 3) & 0xf8) * 255 / 0xf8;				\
143      rgba[3] = 0xff;							\
144   } while (0)
145
146#define TAG(x) radeon##x##_RGB565
147#include "spantmp.h"
148
149/* 32 bit, ARGB8888 color spanline and pixel functions
150 */
151#undef INIT_MONO_PIXEL
152#define INIT_MONO_PIXEL(p, color) \
153  p = PACK_COLOR_8888( color[3], color[0], color[1], color[2] )
154
155#define WRITE_RGBA( _x, _y, r, g, b, a )			\
156do {								\
157   *(GLuint *)(buf + _x*4 + _y*pitch) = ((b <<  0) |		\
158					 (g <<  8) |		\
159					 (r << 16) |		\
160					 (a << 24) );		\
161} while (0)
162
163#define WRITE_PIXEL( _x, _y, p ) 			\
164do {							\
165   *(GLuint *)(buf + _x*4 + _y*pitch) = p;		\
166} while (0)
167
168#define READ_RGBA( rgba, _x, _y )				\
169do {								\
170   volatile GLuint *ptr = (volatile GLuint *)(read_buf + _x*4 + _y*pitch); \
171   GLuint p = *ptr;					\
172   rgba[0] = (p >> 16) & 0xff;					\
173   rgba[1] = (p >>  8) & 0xff;					\
174   rgba[2] = (p >>  0) & 0xff;					\
175   rgba[3] = (p >> 24) & 0xff;					\
176} while (0)
177
178#define TAG(x) radeon##x##_ARGB8888
179#include "spantmp.h"
180
181
182
183/* ================================================================
184 * Depth buffer
185 */
186
187/* The Radeon family has depth tiling on all the time, so we have to convert
188 * the x,y coordinates into the memory bus address (mba) in the same
189 * manner as the engine.  In each case, the linear block address (ba)
190 * is calculated, and then wired with x and y to produce the final
191 * memory address.
192 * The chip will do address translation on its own if the surface registers
193 * are set up correctly. It is not quite enough to get it working with hyperz too...
194 */
195
196static GLuint radeon_mba_z32( radeonContextPtr rmesa,
197				       GLint x, GLint y )
198{
199   GLuint pitch = rmesa->radeonScreen->frontPitch;
200   if (rmesa->radeonScreen->depthHasSurface) {
201      return 4*(x + y*pitch);
202   }
203   else {
204      GLuint ba, address = 0;			/* a[0..1] = 0           */
205
206      ba = (y / 16) * (pitch / 16) + (x / 16);
207
208      address |= (x & 0x7) << 2;			/* a[2..4] = x[0..2]     */
209      address |= (y & 0x3) << 5;			/* a[5..6] = y[0..1]     */
210      address |=
211         (((x & 0x10) >> 2) ^ (y & 0x4)) << 5;	/* a[7]    = x[4] ^ y[2] */
212      address |= (ba & 0x3) << 8;			/* a[8..9] = ba[0..1]    */
213
214      address |= (y & 0x8) << 7;			/* a[10]   = y[3]        */
215      address |=
216         (((x & 0x8) << 1) ^ (y & 0x10)) << 7;	/* a[11]   = x[3] ^ y[4] */
217      address |= (ba & ~0x3) << 10;		/* a[12..] = ba[2..]     */
218
219      return address;
220   }
221}
222
223static __inline GLuint radeon_mba_z16( radeonContextPtr rmesa, GLint x, GLint y )
224{
225   GLuint pitch = rmesa->radeonScreen->frontPitch;
226   if (rmesa->radeonScreen->depthHasSurface) {
227      return 2*(x + y*pitch);
228   }
229   else {
230      GLuint ba, address = 0;			/* a[0]    = 0           */
231
232      ba = (y / 16) * (pitch / 32) + (x / 32);
233
234      address |= (x & 0x7) << 1;			/* a[1..3] = x[0..2]     */
235      address |= (y & 0x7) << 4;			/* a[4..6] = y[0..2]     */
236      address |= (x & 0x8) << 4;			/* a[7]    = x[3]        */
237      address |= (ba & 0x3) << 8;			/* a[8..9] = ba[0..1]    */
238      address |= (y & 0x8) << 7;			/* a[10]   = y[3]        */
239      address |= ((x & 0x10) ^ (y & 0x10)) << 7;	/* a[11]   = x[4] ^ y[4] */
240      address |= (ba & ~0x3) << 10;		/* a[12..] = ba[2..]     */
241
242      return address;
243   }
244}
245
246
247/* 16-bit depth buffer functions
248 */
249#define WRITE_DEPTH( _x, _y, d )					\
250   *(GLushort *)(buf + radeon_mba_z16( rmesa, _x + xo, _y + yo )) = d;
251
252#define READ_DEPTH( d, _x, _y )						\
253   d = *(GLushort *)(buf + radeon_mba_z16( rmesa, _x + xo, _y + yo ));
254
255#define TAG(x) radeon##x##_16
256#include "depthtmp.h"
257
258/* 24 bit depth, 8 bit stencil depthbuffer functions
259 */
260#define WRITE_DEPTH( _x, _y, d )					\
261do {									\
262   GLuint offset = radeon_mba_z32( rmesa, _x + xo, _y + yo );		\
263   GLuint tmp = *(GLuint *)(buf + offset);				\
264   tmp &= 0xff000000;							\
265   tmp |= ((d) & 0x00ffffff);						\
266   *(GLuint *)(buf + offset) = tmp;					\
267} while (0)
268
269#define READ_DEPTH( d, _x, _y )						\
270   d = *(GLuint *)(buf + radeon_mba_z32( rmesa, _x + xo,		\
271					 _y + yo )) & 0x00ffffff;
272
273#define TAG(x) radeon##x##_24_8
274#include "depthtmp.h"
275
276
277/* ================================================================
278 * Stencil buffer
279 */
280
281/* 24 bit depth, 8 bit stencil depthbuffer functions
282 */
283#define WRITE_STENCIL( _x, _y, d )					\
284do {									\
285   GLuint offset = radeon_mba_z32( rmesa, _x + xo, _y + yo );		\
286   GLuint tmp = *(GLuint *)(buf + offset);				\
287   tmp &= 0x00ffffff;							\
288   tmp |= (((d) & 0xff) << 24);						\
289   *(GLuint *)(buf + offset) = tmp;					\
290} while (0)
291
292#define READ_STENCIL( d, _x, _y )					\
293do {									\
294   GLuint offset = radeon_mba_z32( rmesa, _x + xo, _y + yo );		\
295   GLuint tmp = *(GLuint *)(buf + offset);				\
296   tmp &= 0xff000000;							\
297   d = tmp >> 24;							\
298} while (0)
299
300#define TAG(x) radeon##x##_24_8
301#include "stenciltmp.h"
302
303
304/*
305 * This function is called to specify which buffer to read and write
306 * for software rasterization (swrast) fallbacks.  This doesn't necessarily
307 * correspond to glDrawBuffer() or glReadBuffer() calls.
308 */
309static void radeonSetBuffer( GLcontext *ctx,
310                             GLframebuffer *colorBuffer,
311                             GLuint bufferBit )
312{
313   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
314
315   switch ( bufferBit ) {
316   case BUFFER_BIT_FRONT_LEFT:
317      if ( rmesa->sarea->pfCurrentPage == 1 ) {
318        rmesa->state.pixel.readOffset = rmesa->radeonScreen->backOffset;
319        rmesa->state.pixel.readPitch  = rmesa->radeonScreen->backPitch;
320        rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
321        rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
322      } else {
323      	rmesa->state.pixel.readOffset = rmesa->radeonScreen->frontOffset;
324      	rmesa->state.pixel.readPitch  = rmesa->radeonScreen->frontPitch;
325      	rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
326      	rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
327      }
328      break;
329   case BUFFER_BIT_BACK_LEFT:
330      if ( rmesa->sarea->pfCurrentPage == 1 ) {
331      	rmesa->state.pixel.readOffset = rmesa->radeonScreen->frontOffset;
332      	rmesa->state.pixel.readPitch  = rmesa->radeonScreen->frontPitch;
333      	rmesa->state.color.drawOffset = rmesa->radeonScreen->frontOffset;
334      	rmesa->state.color.drawPitch  = rmesa->radeonScreen->frontPitch;
335      } else {
336        rmesa->state.pixel.readOffset = rmesa->radeonScreen->backOffset;
337        rmesa->state.pixel.readPitch  = rmesa->radeonScreen->backPitch;
338        rmesa->state.color.drawOffset = rmesa->radeonScreen->backOffset;
339        rmesa->state.color.drawPitch  = rmesa->radeonScreen->backPitch;
340      }
341      break;
342   default:
343      assert(0);
344      break;
345   }
346}
347
348/* Move locking out to get reasonable span performance (10x better
349 * than doing this in HW_LOCK above).  WaitForIdle() is the main
350 * culprit.
351 */
352
353static void radeonSpanRenderStart( GLcontext *ctx )
354{
355   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
356
357   RADEON_FIREVERTICES( rmesa );
358   LOCK_HARDWARE( rmesa );
359   radeonWaitForIdleLocked( rmesa );
360}
361
362static void radeonSpanRenderFinish( GLcontext *ctx )
363{
364   radeonContextPtr rmesa = RADEON_CONTEXT( ctx );
365   _swrast_flush( ctx );
366   UNLOCK_HARDWARE( rmesa );
367}
368
369void radeonInitSpanFuncs( GLcontext *ctx )
370{
371   radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
372   struct swrast_device_driver *swdd = _swrast_GetDeviceDriverReference(ctx);
373
374   swdd->SetBuffer = radeonSetBuffer;
375
376   switch ( rmesa->radeonScreen->cpp ) {
377   case 2:
378#if 0
379      swdd->WriteRGBASpan	= radeonWriteRGBASpan_RGB565;
380      swdd->WriteRGBSpan	= radeonWriteRGBSpan_RGB565;
381      swdd->WriteMonoRGBASpan	= radeonWriteMonoRGBASpan_RGB565;
382      swdd->WriteRGBAPixels	= radeonWriteRGBAPixels_RGB565;
383      swdd->WriteMonoRGBAPixels	= radeonWriteMonoRGBAPixels_RGB565;
384      swdd->ReadRGBASpan	= radeonReadRGBASpan_RGB565;
385      swdd->ReadRGBAPixels      = radeonReadRGBAPixels_RGB565;
386#endif
387      break;
388
389   case 4:
390#if 0
391      swdd->WriteRGBASpan	= radeonWriteRGBASpan_ARGB8888;
392      swdd->WriteRGBSpan	= radeonWriteRGBSpan_ARGB8888;
393      swdd->WriteMonoRGBASpan   = radeonWriteMonoRGBASpan_ARGB8888;
394      swdd->WriteRGBAPixels     = radeonWriteRGBAPixels_ARGB8888;
395      swdd->WriteMonoRGBAPixels = radeonWriteMonoRGBAPixels_ARGB8888;
396      swdd->ReadRGBASpan	= radeonReadRGBASpan_ARGB8888;
397      swdd->ReadRGBAPixels      = radeonReadRGBAPixels_ARGB8888;
398#endif
399      break;
400
401   default:
402      break;
403   }
404
405   switch ( rmesa->glCtx->Visual.depthBits ) {
406   case 16:
407#if 0
408      swdd->ReadDepthSpan	= radeonReadDepthSpan_16;
409      swdd->WriteDepthSpan	= radeonWriteDepthSpan_16;
410      swdd->ReadDepthPixels	= radeonReadDepthPixels_16;
411      swdd->WriteDepthPixels	= radeonWriteDepthPixels_16;
412#endif
413      break;
414
415   case 24:
416#if 0
417      swdd->ReadDepthSpan	= radeonReadDepthSpan_24_8;
418      swdd->WriteDepthSpan	= radeonWriteDepthSpan_24_8;
419      swdd->ReadDepthPixels	= radeonReadDepthPixels_24_8;
420      swdd->WriteDepthPixels	= radeonWriteDepthPixels_24_8;
421
422      swdd->ReadStencilSpan	= radeonReadStencilSpan_24_8;
423      swdd->WriteStencilSpan	= radeonWriteStencilSpan_24_8;
424      swdd->ReadStencilPixels	= radeonReadStencilPixels_24_8;
425      swdd->WriteStencilPixels	= radeonWriteStencilPixels_24_8;
426#endif
427      break;
428
429   default:
430      break;
431   }
432
433   swdd->SpanRenderStart          = radeonSpanRenderStart;
434   swdd->SpanRenderFinish         = radeonSpanRenderFinish;
435}
436
437
438/**
439 * Plug in the Get/Put routines for the given driRenderbuffer.
440 */
441void
442radeonSetSpanFunctions(driRenderbuffer *drb, const GLvisual *vis)
443{
444   if (drb->Base.InternalFormat == GL_RGBA) {
445      if (vis->redBits == 5 && vis->greenBits == 6 && vis->blueBits == 5) {
446         drb->Base.GetRow        = radeonReadRGBASpan_RGB565;
447         drb->Base.GetValues     = radeonReadRGBAPixels_RGB565;
448         drb->Base.PutRow        = radeonWriteRGBASpan_RGB565;
449         drb->Base.PutRowRGB     = radeonWriteRGBSpan_RGB565;
450         drb->Base.PutMonoRow    = radeonWriteMonoRGBASpan_RGB565;
451         drb->Base.PutValues     = radeonWriteRGBAPixels_RGB565;
452         drb->Base.PutMonoValues = radeonWriteMonoRGBAPixels_RGB565;
453      }
454      else {
455         drb->Base.GetRow        = radeonReadRGBASpan_ARGB8888;
456         drb->Base.GetValues     = radeonReadRGBAPixels_ARGB8888;
457         drb->Base.PutRow        = radeonWriteRGBASpan_ARGB8888;
458         drb->Base.PutRowRGB     = radeonWriteRGBSpan_ARGB8888;
459         drb->Base.PutMonoRow    = radeonWriteMonoRGBASpan_ARGB8888;
460         drb->Base.PutValues     = radeonWriteRGBAPixels_ARGB8888;
461         drb->Base.PutMonoValues = radeonWriteMonoRGBAPixels_ARGB8888;
462      }
463   }
464   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT16) {
465      drb->Base.GetRow        = radeonReadDepthSpan_16;
466      drb->Base.GetValues     = radeonReadDepthPixels_16;
467      drb->Base.PutRow        = radeonWriteDepthSpan_16;
468      drb->Base.PutMonoRow    = radeonWriteMonoDepthSpan_16;
469      drb->Base.PutValues     = radeonWriteDepthPixels_16;
470      drb->Base.PutMonoValues = NULL;
471   }
472   else if (drb->Base.InternalFormat == GL_DEPTH_COMPONENT24) {
473      drb->Base.GetRow        = radeonReadDepthSpan_24_8;
474      drb->Base.GetValues     = radeonReadDepthPixels_24_8;
475      drb->Base.PutRow        = radeonWriteDepthSpan_24_8;
476      drb->Base.PutMonoRow    = radeonWriteMonoDepthSpan_24_8;
477      drb->Base.PutValues     = radeonWriteDepthPixels_24_8;
478      drb->Base.PutMonoValues = NULL;
479   }
480   else if (drb->Base.InternalFormat == GL_STENCIL_INDEX8_EXT) {
481      drb->Base.GetRow        = radeonReadStencilSpan_24_8;
482      drb->Base.GetValues     = radeonReadStencilPixels_24_8;
483      drb->Base.PutRow        = radeonWriteStencilSpan_24_8;
484      drb->Base.PutMonoRow    = radeonWriteMonoStencilSpan_24_8;
485      drb->Base.PutValues     = radeonWriteStencilPixels_24_8;
486      drb->Base.PutMonoValues = NULL;
487   }
488}
489