radeon_span.c revision 45e76d2665b38ba3787548310efc59e969124c01
1/**************************************************************************
2
3Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
4Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5                     VA Linux Systems Inc., Fremont, California.
6
7The Weather Channel (TM) funded Tungsten Graphics to develop the
8initial release of the Radeon 8500 driver under the XFree86 license.
9This notice must be preserved.
10
11All Rights Reserved.
12
13Permission is hereby granted, free of charge, to any person obtaining
14a copy of this software and associated documentation files (the
15"Software"), to deal in the Software without restriction, including
16without limitation the rights to use, copy, modify, merge, publish,
17distribute, sublicense, and/or sell copies of the Software, and to
18permit persons to whom the Software is furnished to do so, subject to
19the following conditions:
20
21The above copyright notice and this permission notice (including the
22next paragraph) shall be included in all copies or substantial
23portions of the Software.
24
25THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
28IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
29LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32
33**************************************************************************/
34
35/*
36 * Authors:
37 *   Kevin E. Martin <martin@valinux.com>
38 *   Gareth Hughes <gareth@valinux.com>
39 *   Keith Whitwell <keith@tungstengraphics.com>
40 *
41 */
42
43#include "main/glheader.h"
44#include "swrast/swrast.h"
45
46#include "radeon_common.h"
47#include "radeon_lock.h"
48#include "radeon_span.h"
49
50#define DBG 0
51
52static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
53
54
55/* r200 depth buffer is always tiled - this is the formula
56   according to the docs unless I typo'ed in it
57*/
58#if defined(RADEON_R200)
59static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb,
60				 GLint x, GLint y)
61{
62    GLubyte *ptr = rrb->bo->ptr;
63    GLint offset;
64    if (rrb->has_surface) {
65	offset = x * rrb->cpp + y * rrb->pitch;
66    } else {
67	GLuint b;
68	offset = 0;
69	b = (((y  >> 4) * (rrb->pitch >> 8) + (x >> 6)));
70	offset += (b >> 1) << 12;
71	offset += (((rrb->pitch >> 8) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
72	offset += ((y >> 2) & 0x3) << 9;
73	offset += ((x >> 3) & 0x1) << 8;
74	offset += ((x >> 4) & 0x3) << 6;
75	offset += ((x >> 2) & 0x1) << 5;
76	offset += ((y >> 1) & 0x1) << 4;
77	offset += ((x >> 1) & 0x1) << 3;
78	offset += (y & 0x1) << 2;
79	offset += (x & 0x1) << 1;
80    }
81    return &ptr[offset];
82}
83
84static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
85				 GLint x, GLint y)
86{
87    GLubyte *ptr = rrb->bo->ptr;
88    GLint offset;
89    if (rrb->has_surface) {
90	offset = x * rrb->cpp + y * rrb->pitch;
91    } else {
92	GLuint b;
93	offset = 0;
94	b = (((y & 0x7ff) >> 4) * (rrb->pitch >> 7) + (x >> 5));
95	offset += (b >> 1) << 12;
96	offset += (((rrb->pitch >> 7) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
97	offset += ((y >> 2) & 0x3) << 9;
98	offset += ((x >> 2) & 0x1) << 8;
99	offset += ((x >> 3) & 0x3) << 6;
100	offset += ((y >> 1) & 0x1) << 5;
101	offset += ((x >> 1) & 0x1) << 4;
102	offset += (y & 0x1) << 3;
103	offset += (x & 0x1) << 2;
104    }
105    return &ptr[offset];
106}
107#endif
108
109/* r600 tiling
110 * two main types:
111 * - 1D (akin to macro-linear/micro-tiled on older asics)
112 * - 2D (akin to macro-tiled/micro-tiled on older asics)
113 * only 1D tiling is implemented below
114 */
115#if defined(RADEON_R600)
116static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb,
117					GLint x, GLint y, GLint is_depth, GLint is_stencil)
118{
119    GLint element_bytes = rrb->cpp;
120    GLint num_samples = 1;
121    GLint tile_width = 8;
122    GLint tile_height = 8;
123    GLint tile_thickness = 1;
124    GLint pitch_elements = rrb->pitch / element_bytes;
125    GLint height = rrb->base.Height;
126    GLint z = 0;
127    GLint sample_number = 0;
128    /* */
129    GLint tile_bytes;
130    GLint tiles_per_row;
131    GLint tiles_per_slice;
132    GLint slice_offset;
133    GLint tile_row_index;
134    GLint tile_column_index;
135    GLint tile_offset;
136    GLint pixel_number = 0;
137    GLint element_offset;
138    GLint offset = 0;
139
140    tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples;
141    tiles_per_row = pitch_elements / tile_width;
142    tiles_per_slice = tiles_per_row * (height / tile_height);
143    slice_offset = (z / tile_thickness) * tiles_per_slice * tile_bytes;
144    tile_row_index = y / tile_height;
145    tile_column_index = x / tile_width;
146    tile_offset = ((tile_row_index * tiles_per_row) + tile_column_index) * tile_bytes;
147
148    if (is_depth) {
149	    GLint pixel_offset = 0;
150
151	    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
152	    pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0]
153	    pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1]
154	    pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
155	    pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2]
156	    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
157	    switch (element_bytes) {
158	    case 2:
159		    pixel_offset = pixel_number * element_bytes * num_samples;
160		    break;
161	    case 4:
162		    /* stencil and depth data are stored separately within a tile.
163		     * stencil is stored in a contiguous tile before the depth tile.
164		     * stencil element is 1 byte, depth element is 3 bytes.
165		     * stencil tile is 64 bytes.
166		     */
167		    if (is_stencil)
168			    pixel_offset = pixel_number * 1 * num_samples;
169		    else
170			    pixel_offset = (pixel_number * 3 * num_samples) + 64;
171		    break;
172	    }
173	    element_offset = pixel_offset + (sample_number * element_bytes);
174    } else {
175	    GLint sample_offset;
176
177	    switch (element_bytes) {
178	    case 1:
179		    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
180		    pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
181		    pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
182		    pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
183		    pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0]
184		    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
185		    break;
186	    case 2:
187		    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
188		    pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
189		    pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
190		    pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0]
191		    pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
192		    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
193		    break;
194	    case 4:
195		    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
196		    pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
197		    pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0]
198		    pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2]
199		    pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
200		    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
201		    break;
202	    }
203	    sample_offset = sample_number * (tile_bytes / num_samples);
204	    element_offset = sample_offset + (pixel_number * element_bytes);
205    }
206    offset = slice_offset + tile_offset + element_offset;
207    return offset;
208}
209
210/* depth buffers */
211static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb,
212			       GLint x, GLint y)
213{
214    GLubyte *ptr = rrb->bo->ptr;
215    GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 0);
216    return &ptr[offset];
217}
218
219static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb,
220				 GLint x, GLint y)
221{
222    GLubyte *ptr = rrb->bo->ptr;
223    GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 1);
224    return &ptr[offset];
225}
226
227static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb,
228			       GLint x, GLint y)
229{
230    GLubyte *ptr = rrb->bo->ptr;
231    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
232    GLint offset;
233
234    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
235        offset = x * rrb->cpp + y * rrb->pitch;
236    } else {
237	    offset = r600_1d_tile_helper(rrb, x, y, 0, 0);
238    }
239    return &ptr[offset];
240}
241
242#else
243
244/* radeon tiling on r300-r500 has 4 states,
245   macro-linear/micro-linear
246   macro-linear/micro-tiled
247   macro-tiled /micro-linear
248   macro-tiled /micro-tiled
249   1 byte surface
250   2 byte surface - two types - we only provide 8x2 microtiling
251   4 byte surface
252   8/16 byte (unused)
253*/
254static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb,
255			     GLint x, GLint y)
256{
257    GLubyte *ptr = rrb->bo->ptr;
258    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
259    GLint offset;
260
261    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
262        offset = x * rrb->cpp + y * rrb->pitch;
263    } else {
264        offset = 0;
265        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
266	    if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
267		offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 5)) << 11;
268		offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 10;
269		offset += (((y >> 4) ^ (x >> 4)) & 0x1) << 9;
270		offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 8;
271		offset += (((y >> 3) ^ (x >> 3)) & 0x1) << 7;
272		offset += ((y >> 1) & 0x1) << 6;
273		offset += ((x >> 2) & 0x1) << 5;
274		offset += (y & 1) << 4;
275		offset += (x & 3) << 2;
276            } else {
277		offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 6)) << 11;
278		offset += (((y >> 2) ^ (x >> 6)) & 0x1) << 10;
279		offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 9;
280		offset += (((y >> 1) ^ (x >> 5)) & 0x1) << 8;
281		offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 7;
282		offset += (y & 1) << 6;
283		offset += (x & 15) << 2;
284            }
285        } else {
286	    offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 2)) << 5;
287	    offset += (y & 1) << 4;
288	    offset += (x & 3) << 2;
289        }
290    }
291    return &ptr[offset];
292}
293
294static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb,
295				     GLint x, GLint y)
296{
297    GLubyte *ptr = rrb->bo->ptr;
298    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
299    GLint offset;
300
301    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
302        offset = x * rrb->cpp + y * rrb->pitch;
303    } else {
304        offset = 0;
305        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
306            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
307		offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 6)) << 11;
308		offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 10;
309		offset += (((y >> 4) ^ (x >> 5)) & 0x1) << 9;
310		offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 8;
311		offset += (((y >> 3) ^ (x >> 4)) & 0x1) << 7;
312		offset += ((y >> 1) & 0x1) << 6;
313		offset += ((x >> 3) & 0x1) << 5;
314		offset += (y & 1) << 4;
315		offset += (x & 3) << 2;
316            } else {
317		offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 7)) << 11;
318		offset += (((y >> 2) ^ (x >> 7)) & 0x1) << 10;
319		offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 9;
320		offset += (((y >> 1) ^ (x >> 6)) & 0x1) << 8;
321		offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 7;
322		offset += (y & 1) << 6;
323		offset += ((x >> 4) & 0x1) << 5;
324                offset += (x & 15) << 2;
325            }
326        } else {
327	    offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 3)) << 5;
328	    offset += (y & 0x1) << 4;
329	    offset += (x & 0x7) << 1;
330        }
331    }
332    return &ptr[offset];
333}
334
335#endif
336
337#ifndef RADEON_R300
338#ifndef RADEON_R600
339static uint32_t
340z24s8_to_s8z24(uint32_t val)
341{
342   return (val << 24) | (val >> 8);
343}
344
345static uint32_t
346s8z24_to_z24s8(uint32_t val)
347{
348   return (val >> 24) | (val << 8);
349}
350#endif
351#endif
352
353/*
354 * Note that all information needed to access pixels in a renderbuffer
355 * should be obtained through the gl_renderbuffer parameter, not per-context
356 * information.
357 */
358#define LOCAL_VARS						\
359   struct radeon_context *radeon = RADEON_CONTEXT(ctx);			\
360   struct radeon_renderbuffer *rrb = (void *) rb;		\
361   const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;			\
362   const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
363   unsigned int num_cliprects;						\
364   struct drm_clip_rect *cliprects;					\
365   int x_off, y_off;							\
366   GLuint p;						\
367   (void)p;						\
368   radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
369
370#define LOCAL_DEPTH_VARS				\
371   struct radeon_context *radeon = RADEON_CONTEXT(ctx);			\
372   struct radeon_renderbuffer *rrb = (void *) rb;	\
373   const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;			\
374   const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
375   unsigned int num_cliprects;						\
376   struct drm_clip_rect *cliprects;					\
377   int x_off, y_off;							\
378  radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
379
380#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
381
382#define Y_FLIP(_y) ((_y) * yScale + yBias)
383
384#define HW_LOCK()
385
386#define HW_UNLOCK()
387
388/* XXX FBO: this is identical to the macro in spantmp2.h except we get
389 * the cliprect info from the context, not the driDrawable.
390 * Move this into spantmp2.h someday.
391 */
392#define HW_CLIPLOOP()							\
393   do {									\
394      int _nc = num_cliprects;						\
395      while ( _nc-- ) {							\
396	 int minx = cliprects[_nc].x1 - x_off;				\
397	 int miny = cliprects[_nc].y1 - y_off;				\
398	 int maxx = cliprects[_nc].x2 - x_off;				\
399	 int maxy = cliprects[_nc].y2 - y_off;
400
401/* ================================================================
402 * Color buffer
403 */
404
405/* 16 bit, RGB565 color spanline and pixel functions
406 */
407#define SPANTMP_PIXEL_FMT GL_RGB
408#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
409
410#define TAG(x)    radeon##x##_RGB565
411#define TAG2(x,y) radeon##x##_RGB565##y
412#if defined(RADEON_R600)
413#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
414#else
415#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
416#endif
417#include "spantmp2.h"
418
419/* 16 bit, ARGB1555 color spanline and pixel functions
420 */
421#define SPANTMP_PIXEL_FMT GL_BGRA
422#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV
423
424#define TAG(x)    radeon##x##_ARGB1555
425#define TAG2(x,y) radeon##x##_ARGB1555##y
426#if defined(RADEON_R600)
427#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
428#else
429#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
430#endif
431#include "spantmp2.h"
432
433/* 16 bit, RGBA4 color spanline and pixel functions
434 */
435#define SPANTMP_PIXEL_FMT GL_BGRA
436#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV
437
438#define TAG(x)    radeon##x##_ARGB4444
439#define TAG2(x,y) radeon##x##_ARGB4444##y
440#if defined(RADEON_R600)
441#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
442#else
443#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
444#endif
445#include "spantmp2.h"
446
447/* 32 bit, xRGB8888 color spanline and pixel functions
448 */
449#define SPANTMP_PIXEL_FMT GL_BGRA
450#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
451
452#define TAG(x)    radeon##x##_xRGB8888
453#define TAG2(x,y) radeon##x##_xRGB8888##y
454#if defined(RADEON_R600)
455#define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0xff000000))
456#define PUT_VALUE(_x, _y, d) { \
457   GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off );		\
458   *_ptr = d;								\
459} while (0)
460#else
461#define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000))
462#define PUT_VALUE(_x, _y, d) { \
463   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
464   *_ptr = d;								\
465} while (0)
466#endif
467#include "spantmp2.h"
468
469/* 32 bit, ARGB8888 color spanline and pixel functions
470 */
471#define SPANTMP_PIXEL_FMT GL_BGRA
472#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
473
474#define TAG(x)    radeon##x##_ARGB8888
475#define TAG2(x,y) radeon##x##_ARGB8888##y
476#if defined(RADEON_R600)
477#define GET_VALUE(_x, _y) (*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)))
478#define PUT_VALUE(_x, _y, d) { \
479   GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off );		\
480   *_ptr = d;								\
481} while (0)
482#else
483#define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)))
484#define PUT_VALUE(_x, _y, d) { \
485   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
486   *_ptr = d;								\
487} while (0)
488#endif
489#include "spantmp2.h"
490
491/* ================================================================
492 * Depth buffer
493 */
494
495/* The Radeon family has depth tiling on all the time, so we have to convert
496 * the x,y coordinates into the memory bus address (mba) in the same
497 * manner as the engine.  In each case, the linear block address (ba)
498 * is calculated, and then wired with x and y to produce the final
499 * memory address.
500 * The chip will do address translation on its own if the surface registers
501 * are set up correctly. It is not quite enough to get it working with hyperz
502 * too...
503 */
504
505/* 16-bit depth buffer functions
506 */
507#define VALUE_TYPE GLushort
508
509#if defined(RADEON_R200)
510#define WRITE_DEPTH( _x, _y, d )					\
511   *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d
512#elif defined(RADEON_R600)
513#define WRITE_DEPTH( _x, _y, d )					\
514   *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) = d
515#else
516#define WRITE_DEPTH( _x, _y, d )					\
517   *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d
518#endif
519
520#if defined(RADEON_R200)
521#define READ_DEPTH( d, _x, _y )						\
522   d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off)
523#elif defined(RADEON_R600)
524#define READ_DEPTH( d, _x, _y )						\
525   d = *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off)
526#else
527#define READ_DEPTH( d, _x, _y )						\
528   d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off)
529#endif
530
531#define TAG(x) radeon##x##_z16
532#include "depthtmp.h"
533
534/* 24 bit depth
535 *
536 * Careful: It looks like the R300 uses ZZZS byte order while the R200
537 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
538 */
539#define VALUE_TYPE GLuint
540
541#if defined(RADEON_R300)
542#define WRITE_DEPTH( _x, _y, d )					\
543do {									\
544   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
545   GLuint tmp = *_ptr;				\
546   tmp &= 0x000000ff;							\
547   tmp |= ((d << 8) & 0xffffff00);					\
548   *_ptr = tmp;					\
549} while (0)
550#elif defined(RADEON_R600)
551#define WRITE_DEPTH( _x, _y, d )					\
552do {									\
553   GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off );		\
554   GLuint tmp = *_ptr;				\
555   tmp &= 0xff000000;							\
556   tmp |= ((d) & 0x00ffffff);					\
557   *_ptr = tmp;					\
558} while (0)
559#elif defined(RADEON_R200)
560#define WRITE_DEPTH( _x, _y, d )					\
561do {									\
562   GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );		\
563   GLuint tmp = *_ptr;				\
564   tmp &= 0xff000000;							\
565   tmp |= ((d) & 0x00ffffff);						\
566   *_ptr = tmp;					\
567} while (0)
568#else
569#define WRITE_DEPTH( _x, _y, d )					\
570do {									\
571   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );	\
572   GLuint tmp = *_ptr;							\
573   tmp &= 0xff000000;							\
574   tmp |= ((d) & 0x00ffffff);						\
575   *_ptr = tmp;					\
576} while (0)
577#endif
578
579#if defined(RADEON_R300)
580#define READ_DEPTH( d, _x, _y )						\
581  do {									\
582    d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
583  }while(0)
584#elif defined(RADEON_R600)
585#define READ_DEPTH( d, _x, _y )						\
586  do {									\
587    d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off)) & 0x00ffffff); \
588  }while(0)
589#elif defined(RADEON_R200)
590#define READ_DEPTH( d, _x, _y )						\
591  do {									\
592    d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; \
593  }while(0)
594#else
595#define READ_DEPTH( d, _x, _y )	\
596  d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off,	_y + y_off)) & 0x00ffffff;
597#endif
598
599#define TAG(x) radeon##x##_z24
600#include "depthtmp.h"
601
602/* 24 bit depth, 8 bit stencil depthbuffer functions
603 * EXT_depth_stencil
604 *
605 * Careful: It looks like the R300 uses ZZZS byte order while the R200
606 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
607 */
608#define VALUE_TYPE GLuint
609
610#if defined(RADEON_R300)
611#define WRITE_DEPTH( _x, _y, d )					\
612do {									\
613   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
614   *_ptr = d;								\
615} while (0)
616#elif defined(RADEON_R600)
617#define WRITE_DEPTH( _x, _y, d )					\
618do {									\
619   GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off );		\
620   GLuint tmp = *_ptr;				\
621   tmp &= 0xff000000;							\
622   tmp |= (((d) >> 8) & 0x00ffffff);					\
623   *_ptr = tmp;					\
624   _ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off);		\
625   tmp = *_ptr;				\
626   tmp &= 0xffffff00;							\
627   tmp |= (d) & 0xff;							\
628   *_ptr = tmp;					\
629} while (0)
630#elif defined(RADEON_R200)
631#define WRITE_DEPTH( _x, _y, d )					\
632do {									\
633   GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );		\
634   GLuint tmp = z24s8_to_s8z24(d);					\
635   *_ptr = tmp;								\
636} while (0)
637#else
638#define WRITE_DEPTH( _x, _y, d )					\
639do {									\
640   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );	\
641   GLuint tmp = z24s8_to_s8z24(d);					\
642   *_ptr = tmp;					\
643} while (0)
644#endif
645
646#if defined(RADEON_R300)
647#define READ_DEPTH( d, _x, _y )						\
648  do { \
649    d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)));	\
650  }while(0)
651#elif defined(RADEON_R600)
652#define READ_DEPTH( d, _x, _y )						\
653  do { \
654    d = ((*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) << 8) & 0xffffff00; \
655    d |= (*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) & 0x000000ff;	\
656  }while(0)
657#elif defined(RADEON_R200)
658#define READ_DEPTH( d, _x, _y )						\
659  do { \
660    d = s8z24_to_z24s8(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)));	\
661  }while(0)
662#else
663#define READ_DEPTH( d, _x, _y )	do {					\
664    d = s8z24_to_z24s8(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off,	_y + y_off ))); \
665  } while (0)
666#endif
667
668#define TAG(x) radeon##x##_z24_s8
669#include "depthtmp.h"
670
671/* ================================================================
672 * Stencil buffer
673 */
674
675/* 24 bit depth, 8 bit stencil depthbuffer functions
676 */
677#ifdef RADEON_R300
678#define WRITE_STENCIL( _x, _y, d )					\
679do {									\
680   GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off);		\
681   GLuint tmp = *_ptr;				\
682   tmp &= 0xffffff00;							\
683   tmp |= (d) & 0xff;							\
684   *_ptr = tmp;					\
685} while (0)
686#elif defined(RADEON_R600)
687#define WRITE_STENCIL( _x, _y, d )					\
688do {									\
689   GLuint *_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off);		\
690   GLuint tmp = *_ptr;				\
691   tmp &= 0xffffff00;							\
692   tmp |= (d) & 0xff;							\
693   *_ptr = tmp;					\
694} while (0)
695#elif defined(RADEON_R200)
696#define WRITE_STENCIL( _x, _y, d )					\
697do {									\
698   GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off);		\
699   GLuint tmp = *_ptr;				\
700   tmp &= 0x00ffffff;							\
701   tmp |= (((d) & 0xff) << 24);						\
702   *_ptr = tmp;					\
703} while (0)
704#else
705#define WRITE_STENCIL( _x, _y, d )					\
706do {									\
707   GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off);		\
708   GLuint tmp = *_ptr;				\
709   tmp &= 0x00ffffff;							\
710   tmp |= (((d) & 0xff) << 24);						\
711   *_ptr = tmp;					\
712} while (0)
713#endif
714
715#ifdef RADEON_R300
716#define READ_STENCIL( d, _x, _y )					\
717do {									\
718   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
719   GLuint tmp = *_ptr;				\
720   d = tmp & 0x000000ff;						\
721} while (0)
722#elif defined(RADEON_R600)
723#define READ_STENCIL( d, _x, _y )					\
724do {									\
725   GLuint *_ptr = (GLuint*)r600_ptr_stencil( rrb, _x + x_off, _y + y_off );		\
726   GLuint tmp = *_ptr;				\
727   d = tmp & 0x000000ff;						\
728} while (0)
729#elif defined(RADEON_R200)
730#define READ_STENCIL( d, _x, _y )					\
731do {									\
732   GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );		\
733   GLuint tmp = *_ptr;				\
734   d = (tmp & 0xff000000) >> 24;					\
735} while (0)
736#else
737#define READ_STENCIL( d, _x, _y )					\
738do {									\
739   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
740   GLuint tmp = *_ptr;				\
741   d = (tmp & 0xff000000) >> 24;					\
742} while (0)
743#endif
744
745#define TAG(x) radeon##x##_z24_s8
746#include "stenciltmp.h"
747
748
749static void map_unmap_rb(struct gl_renderbuffer *rb, int flag)
750{
751	struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
752	int r;
753
754	if (rrb == NULL || !rrb->bo)
755		return;
756
757	if (flag) {
758		if (rrb->bo->bom->funcs->bo_wait)
759			radeon_bo_wait(rrb->bo);
760		r = radeon_bo_map(rrb->bo, 1);
761		if (r) {
762			fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
763				__FUNCTION__, r);
764		}
765
766		radeonSetSpanFunctions(rrb);
767	} else {
768		radeon_bo_unmap(rrb->bo);
769		rb->GetRow = NULL;
770		rb->PutRow = NULL;
771	}
772}
773
774static void
775radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map)
776{
777	GLuint i, j;
778
779	/* color draw buffers */
780	for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++)
781		map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map);
782
783	/* check for render to textures */
784	for (i = 0; i < BUFFER_COUNT; i++) {
785		struct gl_renderbuffer_attachment *att =
786			ctx->DrawBuffer->Attachment + i;
787		struct gl_texture_object *tex = att->Texture;
788		if (tex) {
789			/* Render to texture. Note that a mipmapped texture need not
790			 * be complete for render to texture, so we must restrict to
791			 * mapping only the attached image.
792			 */
793			radeon_texture_image *image = get_radeon_texture_image(tex->Image[att->CubeMapFace][att->TextureLevel]);
794			ASSERT(att->Renderbuffer);
795
796			if (map)
797				radeon_teximage_map(image, GL_TRUE);
798			else
799				radeon_teximage_unmap(image);
800		}
801	}
802
803	map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map);
804
805	/* depth buffer (Note wrapper!) */
806	if (ctx->DrawBuffer->_DepthBuffer)
807		map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map);
808
809	if (ctx->DrawBuffer->_StencilBuffer)
810		map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map);
811}
812
813static void radeonSpanRenderStart(GLcontext * ctx)
814{
815	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
816	int i;
817
818	radeon_firevertices(rmesa);
819
820	/* The locking and wait for idle should really only be needed in classic mode.
821	 * In a future memory manager based implementation, this should become
822	 * unnecessary due to the fact that mapping our buffers, textures, etc.
823	 * should implicitly wait for any previous rendering commands that must
824	 * be waited on. */
825	if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
826		LOCK_HARDWARE(rmesa);
827		radeonWaitForIdleLocked(rmesa);
828	}
829
830	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
831		if (ctx->Texture.Unit[i]._ReallyEnabled)
832			ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
833	}
834
835	radeon_map_unmap_buffers(ctx, 1);
836}
837
838static void radeonSpanRenderFinish(GLcontext * ctx)
839{
840	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
841	int i;
842	_swrast_flush(ctx);
843	if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
844		UNLOCK_HARDWARE(rmesa);
845	}
846	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
847		if (ctx->Texture.Unit[i]._ReallyEnabled)
848			ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
849	}
850
851	radeon_map_unmap_buffers(ctx, 0);
852}
853
854void radeonInitSpanFuncs(GLcontext * ctx)
855{
856	struct swrast_device_driver *swdd =
857	    _swrast_GetDeviceDriverReference(ctx);
858	swdd->SpanRenderStart = radeonSpanRenderStart;
859	swdd->SpanRenderFinish = radeonSpanRenderFinish;
860}
861
862/**
863 * Plug in the Get/Put routines for the given driRenderbuffer.
864 */
865static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
866{
867	if (rrb->base.Format == MESA_FORMAT_RGB565) {
868		radeonInitPointers_RGB565(&rrb->base);
869	} else if (rrb->base.Format == MESA_FORMAT_RGBA8888) { /* XXX */
870		radeonInitPointers_xRGB8888(&rrb->base);
871	} else if (rrb->base.Format == MESA_FORMAT_RGBA8888) {
872		radeonInitPointers_ARGB8888(&rrb->base);
873	} else if (rrb->base.Format == MESA_FORMAT_ARGB4444) {
874		radeonInitPointers_ARGB4444(&rrb->base);
875	} else if (rrb->base.Format == MESA_FORMAT_ARGB1555) {
876		radeonInitPointers_ARGB1555(&rrb->base);
877	} else if (rrb->base.Format == MESA_FORMAT_Z16) {
878		radeonInitDepthPointers_z16(&rrb->base);
879	} else if (rrb->base.Format == GL_DEPTH_COMPONENT32) { /* XXX */
880		radeonInitDepthPointers_z24(&rrb->base);
881	} else if (rrb->base.Format == MESA_FORMAT_Z24_S8) {
882		radeonInitDepthPointers_z24_s8(&rrb->base);
883	} else if (rrb->base.Format == MESA_FORMAT_S8) {
884		radeonInitStencilPointers_z24_s8(&rrb->base);
885	} else {
886		fprintf(stderr, "radeonSetSpanFunctions: bad actual format: 0x%04X\n", rrb->base.Format);
887	}
888}
889