radeon_span.c revision 22627654256ee09bfd659624568865a79eb725b6
1/**************************************************************************
2
3Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
4Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5                     VA Linux Systems Inc., Fremont, California.
6
7The Weather Channel (TM) funded Tungsten Graphics to develop the
8initial release of the Radeon 8500 driver under the XFree86 license.
9This notice must be preserved.
10
11All Rights Reserved.
12
13Permission is hereby granted, free of charge, to any person obtaining
14a copy of this software and associated documentation files (the
15"Software"), to deal in the Software without restriction, including
16without limitation the rights to use, copy, modify, merge, publish,
17distribute, sublicense, and/or sell copies of the Software, and to
18permit persons to whom the Software is furnished to do so, subject to
19the following conditions:
20
21The above copyright notice and this permission notice (including the
22next paragraph) shall be included in all copies or substantial
23portions of the Software.
24
25THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
28IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
29LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32
33**************************************************************************/
34
35/*
36 * Authors:
37 *   Kevin E. Martin <martin@valinux.com>
38 *   Gareth Hughes <gareth@valinux.com>
39 *   Keith Whitwell <keith@tungstengraphics.com>
40 *
41 */
42
43#include "main/glheader.h"
44#include "main/texformat.h"
45#include "swrast/swrast.h"
46
47#include "radeon_common.h"
48#include "radeon_lock.h"
49#include "radeon_span.h"
50
51#define DBG 0
52
53static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
54
55
56/* r200 depth buffer is always tiled - this is the formula
57   according to the docs unless I typo'ed in it
58*/
59#if defined(RADEON_R200)
60static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb,
61				 GLint x, GLint y)
62{
63    GLubyte *ptr = rrb->bo->ptr;
64    GLint offset;
65    if (rrb->has_surface) {
66	offset = x * rrb->cpp + y * rrb->pitch;
67    } else {
68	GLuint b;
69	offset = 0;
70	b = (((y  >> 4) * (rrb->pitch >> 8) + (x >> 6)));
71	offset += (b >> 1) << 12;
72	offset += (((rrb->pitch >> 8) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
73	offset += ((y >> 2) & 0x3) << 9;
74	offset += ((x >> 3) & 0x1) << 8;
75	offset += ((x >> 4) & 0x3) << 6;
76	offset += ((x >> 2) & 0x1) << 5;
77	offset += ((y >> 1) & 0x1) << 4;
78	offset += ((x >> 1) & 0x1) << 3;
79	offset += (y & 0x1) << 2;
80	offset += (x & 0x1) << 1;
81    }
82    return &ptr[offset];
83}
84
85static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
86				 GLint x, GLint y)
87{
88    GLubyte *ptr = rrb->bo->ptr;
89    GLint offset;
90    if (rrb->has_surface) {
91	offset = x * rrb->cpp + y * rrb->pitch;
92    } else {
93	GLuint b;
94	offset = 0;
95	b = (((y & 0x7ff) >> 4) * (rrb->pitch >> 7) + (x >> 5));
96	offset += (b >> 1) << 12;
97	offset += (((rrb->pitch >> 7) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
98	offset += ((y >> 2) & 0x3) << 9;
99	offset += ((x >> 2) & 0x1) << 8;
100	offset += ((x >> 3) & 0x3) << 6;
101	offset += ((y >> 1) & 0x1) << 5;
102	offset += ((x >> 1) & 0x1) << 4;
103	offset += (y & 0x1) << 3;
104	offset += (x & 0x1) << 2;
105    }
106    return &ptr[offset];
107}
108#endif
109
110/* r600 tiling
111 * two main types:
112 * - 1D (akin to macro-linear/micro-tiled on older asics)
113 * - 2D (akin to macro-tiled/micro-tiled on older asics)
114 * only 1D tiling is implemented below
115 */
116#if defined(RADEON_R600)
117static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb,
118					GLint x, GLint y, GLint is_depth, GLint is_stencil)
119{
120    GLint element_bytes = rrb->cpp;
121    GLint num_samples = 1;
122    GLint tile_width = 8;
123    GLint tile_height = 8;
124    GLint tile_thickness = 1;
125    GLint pitch_elements = rrb->pitch / element_bytes;
126    GLint height = rrb->base.Height;
127    GLint z = 0;
128    GLint sample_number = 0;
129    /* */
130    GLint tile_bytes;
131    GLint tiles_per_row;
132    GLint tiles_per_slice;
133    GLint slice_offset;
134    GLint tile_row_index;
135    GLint tile_column_index;
136    GLint tile_offset;
137    GLint pixel_number = 0;
138    GLint element_offset;
139    GLint offset = 0;
140
141    tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples;
142    tiles_per_row = pitch_elements / tile_width;
143    tiles_per_slice = tiles_per_row * (height / tile_height);
144    slice_offset = (z / tile_thickness) * tiles_per_slice * tile_bytes;
145    tile_row_index = y / tile_height;
146    tile_column_index = x / tile_width;
147    tile_offset = ((tile_row_index * tiles_per_row) + tile_column_index) * tile_bytes;
148
149    if (is_depth) {
150	    GLint pixel_offset = 0;
151
152	    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
153	    pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0]
154	    pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1]
155	    pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
156	    pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2]
157	    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
158	    switch (element_bytes) {
159	    case 2:
160		    pixel_offset = pixel_number * element_bytes * num_samples;
161		    break;
162	    case 4:
163		    /* stencil and depth data are stored separately within a tile.
164		     * stencil is stored in a contiguous tile before the depth tile.
165		     * stencil element is 1 byte, depth element is 3 bytes.
166		     * stencil tile is 64 bytes.
167		     */
168		    if (is_stencil)
169			    pixel_offset = pixel_number * 1 * num_samples;
170		    else
171			    pixel_offset = (pixel_number * 3 * num_samples) + 64;
172		    break;
173	    }
174	    element_offset = pixel_offset + (sample_number * element_bytes);
175    } else {
176	    GLint sample_offset;
177
178	    switch (element_bytes) {
179	    case 1:
180		    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
181		    pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
182		    pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
183		    pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
184		    pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0]
185		    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
186		    break;
187	    case 2:
188		    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
189		    pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
190		    pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
191		    pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0]
192		    pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
193		    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
194		    break;
195	    case 4:
196		    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
197		    pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
198		    pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0]
199		    pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2]
200		    pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
201		    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
202		    break;
203	    }
204	    sample_offset = sample_number * (tile_bytes / num_samples);
205	    element_offset = sample_offset + (pixel_number * element_bytes);
206    }
207    offset = slice_offset + tile_offset + element_offset;
208    return offset;
209}
210
211/* depth buffers */
212static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb,
213			       GLint x, GLint y)
214{
215    GLubyte *ptr = rrb->bo->ptr;
216    GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 0);
217    return &ptr[offset];
218}
219
220static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb,
221				 GLint x, GLint y)
222{
223    GLubyte *ptr = rrb->bo->ptr;
224    GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 1);
225    return &ptr[offset];
226}
227
228static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb,
229			       GLint x, GLint y)
230{
231    GLubyte *ptr = rrb->bo->ptr;
232    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
233    GLint offset;
234
235    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
236        offset = x * rrb->cpp + y * rrb->pitch;
237    } else {
238	    offset = r600_1d_tile_helper(rrb, x, y, 0, 0);
239    }
240    return &ptr[offset];
241}
242
243#else
244
245/* radeon tiling on r300-r500 has 4 states,
246   macro-linear/micro-linear
247   macro-linear/micro-tiled
248   macro-tiled /micro-linear
249   macro-tiled /micro-tiled
250   1 byte surface
251   2 byte surface - two types - we only provide 8x2 microtiling
252   4 byte surface
253   8/16 byte (unused)
254*/
255static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb,
256			     GLint x, GLint y)
257{
258    GLubyte *ptr = rrb->bo->ptr;
259    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
260    GLint offset;
261
262    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
263        offset = x * rrb->cpp + y * rrb->pitch;
264    } else {
265        offset = 0;
266        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
267	    if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
268		offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 5)) << 11;
269		offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 10;
270		offset += (((y >> 4) ^ (x >> 4)) & 0x1) << 9;
271		offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 8;
272		offset += (((y >> 3) ^ (x >> 3)) & 0x1) << 7;
273		offset += ((y >> 1) & 0x1) << 6;
274		offset += ((x >> 2) & 0x1) << 5;
275		offset += (y & 1) << 4;
276		offset += (x & 3) << 2;
277            } else {
278		offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 6)) << 11;
279		offset += (((y >> 2) ^ (x >> 6)) & 0x1) << 10;
280		offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 9;
281		offset += (((y >> 1) ^ (x >> 5)) & 0x1) << 8;
282		offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 7;
283		offset += (y & 1) << 6;
284		offset += (x & 15) << 2;
285            }
286        } else {
287	    offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 2)) << 5;
288	    offset += (y & 1) << 4;
289	    offset += (x & 3) << 2;
290        }
291    }
292    return &ptr[offset];
293}
294
295static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb,
296				     GLint x, GLint y)
297{
298    GLubyte *ptr = rrb->bo->ptr;
299    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
300    GLint offset;
301
302    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
303        offset = x * rrb->cpp + y * rrb->pitch;
304    } else {
305        offset = 0;
306        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
307            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
308		offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 6)) << 11;
309		offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 10;
310		offset += (((y >> 4) ^ (x >> 5)) & 0x1) << 9;
311		offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 8;
312		offset += (((y >> 3) ^ (x >> 4)) & 0x1) << 7;
313		offset += ((y >> 1) & 0x1) << 6;
314		offset += ((x >> 3) & 0x1) << 5;
315		offset += (y & 1) << 4;
316		offset += (x & 3) << 2;
317            } else {
318		offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 7)) << 11;
319		offset += (((y >> 2) ^ (x >> 7)) & 0x1) << 10;
320		offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 9;
321		offset += (((y >> 1) ^ (x >> 6)) & 0x1) << 8;
322		offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 7;
323		offset += (y & 1) << 6;
324		offset += ((x >> 4) & 0x1) << 5;
325                offset += (x & 15) << 2;
326            }
327        } else {
328	    offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 3)) << 5;
329	    offset += (y & 0x1) << 4;
330	    offset += (x & 0x7) << 1;
331        }
332    }
333    return &ptr[offset];
334}
335
336#endif
337
338/*
339 * Note that all information needed to access pixels in a renderbuffer
340 * should be obtained through the gl_renderbuffer parameter, not per-context
341 * information.
342 */
343#define LOCAL_VARS						\
344   struct radeon_context *radeon = RADEON_CONTEXT(ctx);			\
345   struct radeon_renderbuffer *rrb = (void *) rb;		\
346   const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;			\
347   const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
348   unsigned int num_cliprects;						\
349   struct drm_clip_rect *cliprects;					\
350   int x_off, y_off;							\
351   GLuint p;						\
352   (void)p;						\
353   radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
354
355#define LOCAL_DEPTH_VARS				\
356   struct radeon_context *radeon = RADEON_CONTEXT(ctx);			\
357   struct radeon_renderbuffer *rrb = (void *) rb;	\
358   const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;			\
359   const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
360   unsigned int num_cliprects;						\
361   struct drm_clip_rect *cliprects;					\
362   int x_off, y_off;							\
363  radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
364
365#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
366
367#define Y_FLIP(_y) ((_y) * yScale + yBias)
368
369#define HW_LOCK()
370
371#define HW_UNLOCK()
372
373/* XXX FBO: this is identical to the macro in spantmp2.h except we get
374 * the cliprect info from the context, not the driDrawable.
375 * Move this into spantmp2.h someday.
376 */
377#define HW_CLIPLOOP()							\
378   do {									\
379      int _nc = num_cliprects;						\
380      while ( _nc-- ) {							\
381	 int minx = cliprects[_nc].x1 - x_off;				\
382	 int miny = cliprects[_nc].y1 - y_off;				\
383	 int maxx = cliprects[_nc].x2 - x_off;				\
384	 int maxy = cliprects[_nc].y2 - y_off;
385
386/* ================================================================
387 * Color buffer
388 */
389
390/* 16 bit, RGB565 color spanline and pixel functions
391 */
392#define SPANTMP_PIXEL_FMT GL_RGB
393#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
394
395#define TAG(x)    radeon##x##_RGB565
396#define TAG2(x,y) radeon##x##_RGB565##y
397#if defined(RADEON_R600)
398#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
399#else
400#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
401#endif
402#include "spantmp2.h"
403
404#define SPANTMP_PIXEL_FMT GL_RGB
405#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5_REV
406
407#define TAG(x)    radeon##x##_RGB565_REV
408#define TAG2(x,y) radeon##x##_RGB565_REV##y
409#if defined(RADEON_R600)
410#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
411#else
412#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
413#endif
414#include "spantmp2.h"
415
416/* 16 bit, ARGB1555 color spanline and pixel functions
417 */
418#define SPANTMP_PIXEL_FMT GL_BGRA
419#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV
420
421#define TAG(x)    radeon##x##_ARGB1555
422#define TAG2(x,y) radeon##x##_ARGB1555##y
423#if defined(RADEON_R600)
424#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
425#else
426#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
427#endif
428#include "spantmp2.h"
429
430#define SPANTMP_PIXEL_FMT GL_BGRA
431#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5
432
433#define TAG(x)    radeon##x##_ARGB1555_REV
434#define TAG2(x,y) radeon##x##_ARGB1555_REV##y
435#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
436#include "spantmp2.h"
437
438/* 16 bit, RGBA4 color spanline and pixel functions
439 */
440#define SPANTMP_PIXEL_FMT GL_BGRA
441#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV
442
443#define TAG(x)    radeon##x##_ARGB4444
444#define TAG2(x,y) radeon##x##_ARGB4444##y
445#if defined(RADEON_R600)
446#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
447#else
448#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
449#endif
450#include "spantmp2.h"
451
452#define SPANTMP_PIXEL_FMT GL_BGRA
453#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4
454
455#define TAG(x)    radeon##x##_ARGB4444_REV
456#define TAG2(x,y) radeon##x##_ARGB4444_REV##y
457#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
458#include "spantmp2.h"
459
460/* 32 bit, xRGB8888 color spanline and pixel functions
461 */
462#define SPANTMP_PIXEL_FMT GL_BGRA
463#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
464
465#define TAG(x)    radeon##x##_xRGB8888
466#define TAG2(x,y) radeon##x##_xRGB8888##y
467#if defined(RADEON_R600)
468#define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0xff000000))
469#define PUT_VALUE(_x, _y, d) { \
470   GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off );		\
471   *_ptr = d;								\
472} while (0)
473#else
474#define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000))
475#define PUT_VALUE(_x, _y, d) { \
476   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
477   *_ptr = d;								\
478} while (0)
479#endif
480#include "spantmp2.h"
481
482/* 32 bit, ARGB8888 color spanline and pixel functions
483 */
484#define SPANTMP_PIXEL_FMT GL_BGRA
485#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
486
487#define TAG(x)    radeon##x##_ARGB8888
488#define TAG2(x,y) radeon##x##_ARGB8888##y
489#if defined(RADEON_R600)
490#define GET_VALUE(_x, _y) (*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)))
491#define PUT_VALUE(_x, _y, d) { \
492   GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off );		\
493   *_ptr = d;								\
494} while (0)
495#else
496#define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)))
497#define PUT_VALUE(_x, _y, d) { \
498   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
499   *_ptr = d;								\
500} while (0)
501#endif
502#include "spantmp2.h"
503
504/* 32 bit, BGRx8888 color spanline and pixel functions
505 */
506#define SPANTMP_PIXEL_FMT GL_BGRA
507#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8
508
509#define TAG(x)    radeon##x##_BGRx8888
510#define TAG2(x,y) radeon##x##_BGRx8888##y
511#define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0x000000ff))
512#define PUT_VALUE(_x, _y, d) { \
513   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
514   *_ptr = d;								\
515} while (0)
516#include "spantmp2.h"
517
518/* 32 bit, BGRA8888 color spanline and pixel functions
519 */
520#define SPANTMP_PIXEL_FMT GL_BGRA
521#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8
522
523#define TAG(x)    radeon##x##_BGRA8888
524#define TAG2(x,y) radeon##x##_BGRA8888##y
525#define GET_PTR(X,Y) radeon_ptr_4byte(rrb, (X) + x_off, (Y) + y_off)
526#include "spantmp2.h"
527
528/* ================================================================
529 * Depth buffer
530 */
531
532/* The Radeon family has depth tiling on all the time, so we have to convert
533 * the x,y coordinates into the memory bus address (mba) in the same
534 * manner as the engine.  In each case, the linear block address (ba)
535 * is calculated, and then wired with x and y to produce the final
536 * memory address.
537 * The chip will do address translation on its own if the surface registers
538 * are set up correctly. It is not quite enough to get it working with hyperz
539 * too...
540 */
541
542/* 16-bit depth buffer functions
543 */
544#define VALUE_TYPE GLushort
545
546#if defined(RADEON_R200)
547#define WRITE_DEPTH( _x, _y, d )					\
548   *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d
549#elif defined(RADEON_R600)
550#define WRITE_DEPTH( _x, _y, d )					\
551   *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) = d
552#else
553#define WRITE_DEPTH( _x, _y, d )					\
554   *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d
555#endif
556
557#if defined(RADEON_R200)
558#define READ_DEPTH( d, _x, _y )						\
559   d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off)
560#elif defined(RADEON_R600)
561#define READ_DEPTH( d, _x, _y )						\
562   d = *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off)
563#else
564#define READ_DEPTH( d, _x, _y )						\
565   d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off)
566#endif
567
568#define TAG(x) radeon##x##_z16
569#include "depthtmp.h"
570
571/* 24 bit depth
572 *
573 * Careful: It looks like the R300 uses ZZZS byte order while the R200
574 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
575 */
576#define VALUE_TYPE GLuint
577
578#if defined(RADEON_R300)
579#define WRITE_DEPTH( _x, _y, d )					\
580do {									\
581   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
582   GLuint tmp = *_ptr;				\
583   tmp &= 0x000000ff;							\
584   tmp |= ((d << 8) & 0xffffff00);					\
585   *_ptr = tmp;					\
586} while (0)
587#elif defined(RADEON_R600)
588#define WRITE_DEPTH( _x, _y, d )					\
589do {									\
590   GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off );		\
591   GLuint tmp = *_ptr;				\
592   tmp &= 0xff000000;							\
593   tmp |= ((d) & 0x00ffffff);					\
594   *_ptr = tmp;					\
595} while (0)
596#elif defined(RADEON_R200)
597#define WRITE_DEPTH( _x, _y, d )					\
598do {									\
599   GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );		\
600   GLuint tmp = *_ptr;				\
601   tmp &= 0xff000000;							\
602   tmp |= ((d) & 0x00ffffff);						\
603   *_ptr = tmp;					\
604} while (0)
605#else
606#define WRITE_DEPTH( _x, _y, d )					\
607do {									\
608   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );	\
609   GLuint tmp = *_ptr;							\
610   tmp &= 0xff000000;							\
611   tmp |= ((d) & 0x00ffffff);						\
612   *_ptr = tmp;					\
613} while (0)
614#endif
615
616#if defined(RADEON_R300)
617#define READ_DEPTH( d, _x, _y )						\
618  do {									\
619    d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
620  }while(0)
621#elif defined(RADEON_R600)
622#define READ_DEPTH( d, _x, _y )						\
623  do {									\
624    d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off)) & 0x00ffffff); \
625  }while(0)
626#elif defined(RADEON_R200)
627#define READ_DEPTH( d, _x, _y )						\
628  do {									\
629    d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; \
630  }while(0)
631#else
632#define READ_DEPTH( d, _x, _y )	\
633  d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off,	_y + y_off)) & 0x00ffffff;
634#endif
635
636#define TAG(x) radeon##x##_z24
637#include "depthtmp.h"
638
639/* 24 bit depth, 8 bit stencil depthbuffer functions
640 * EXT_depth_stencil
641 *
642 * Careful: It looks like the R300 uses ZZZS byte order while the R200
643 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
644 */
645#define VALUE_TYPE GLuint
646
647#if defined(RADEON_R300)
648#define WRITE_DEPTH( _x, _y, d )					\
649do {									\
650   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
651   *_ptr = d;								\
652} while (0)
653#elif defined(RADEON_R600)
654#define WRITE_DEPTH( _x, _y, d )					\
655do {									\
656   GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off );		\
657   GLuint tmp = *_ptr;				\
658   tmp &= 0xff000000;							\
659   tmp |= ((d) & 0x00ffffff);					\
660   *_ptr = tmp;					\
661   _ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off);		\
662   tmp = *_ptr;				\
663   tmp &= 0xffffff00;							\
664   tmp |= ((d) >> 24) & 0xff;						\
665   *_ptr = tmp;					\
666} while (0)
667#elif defined(RADEON_R200)
668#define WRITE_DEPTH( _x, _y, d )					\
669do {									\
670   GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );		\
671   *_ptr = d;								\
672} while (0)
673#else
674#define WRITE_DEPTH( _x, _y, d )					\
675do {									\
676   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );	\
677   *_ptr = d;					\
678} while (0)
679#endif
680
681#if defined(RADEON_R300)
682#define READ_DEPTH( d, _x, _y )						\
683  do { \
684    d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)));	\
685  }while(0)
686#elif defined(RADEON_R600)
687#define READ_DEPTH( d, _x, _y )						\
688  do { \
689    d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) & 0x00ffffff; \
690    d |= ((*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) << 24) & 0xff000000; \
691  }while(0)
692#elif defined(RADEON_R200)
693#define READ_DEPTH( d, _x, _y )						\
694  do { \
695    d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off));	\
696  }while(0)
697#else
698#define READ_DEPTH( d, _x, _y )	do {					\
699    d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off,	_y + y_off )); \
700  } while (0)
701#endif
702
703#define TAG(x) radeon##x##_s8_z24
704#include "depthtmp.h"
705
706/* ================================================================
707 * Stencil buffer
708 */
709
710/* 24 bit depth, 8 bit stencil depthbuffer functions
711 */
712#ifdef RADEON_R300
713#define WRITE_STENCIL( _x, _y, d )					\
714do {									\
715   GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off);		\
716   GLuint tmp = *_ptr;				\
717   tmp &= 0xffffff00;							\
718   tmp |= (d) & 0xff;							\
719   *_ptr = tmp;					\
720} while (0)
721#elif defined(RADEON_R600)
722#define WRITE_STENCIL( _x, _y, d )					\
723do {									\
724   GLuint *_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off);		\
725   GLuint tmp = *_ptr;				\
726   tmp &= 0xffffff00;							\
727   tmp |= (d) & 0xff;							\
728   *_ptr = tmp;					\
729} while (0)
730#elif defined(RADEON_R200)
731#define WRITE_STENCIL( _x, _y, d )					\
732do {									\
733   GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off);		\
734   GLuint tmp = *_ptr;				\
735   tmp &= 0x00ffffff;							\
736   tmp |= (((d) & 0xff) << 24);						\
737   *_ptr = tmp;					\
738} while (0)
739#else
740#define WRITE_STENCIL( _x, _y, d )					\
741do {									\
742   GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off);		\
743   GLuint tmp = *_ptr;				\
744   tmp &= 0x00ffffff;							\
745   tmp |= (((d) & 0xff) << 24);						\
746   *_ptr = tmp;					\
747} while (0)
748#endif
749
750#ifdef RADEON_R300
751#define READ_STENCIL( d, _x, _y )					\
752do {									\
753   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
754   GLuint tmp = *_ptr;				\
755   d = tmp & 0x000000ff;						\
756} while (0)
757#elif defined(RADEON_R600)
758#define READ_STENCIL( d, _x, _y )					\
759do {									\
760   GLuint *_ptr = (GLuint*)r600_ptr_stencil( rrb, _x + x_off, _y + y_off );		\
761   GLuint tmp = *_ptr;				\
762   d = tmp & 0x000000ff;						\
763} while (0)
764#elif defined(RADEON_R200)
765#define READ_STENCIL( d, _x, _y )					\
766do {									\
767   GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );		\
768   GLuint tmp = *_ptr;				\
769   d = (tmp & 0xff000000) >> 24;					\
770} while (0)
771#else
772#define READ_STENCIL( d, _x, _y )					\
773do {									\
774   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
775   GLuint tmp = *_ptr;				\
776   d = (tmp & 0xff000000) >> 24;					\
777} while (0)
778#endif
779
780#define TAG(x) radeon##x##_s8_z24
781#include "stenciltmp.h"
782
783
784static void map_unmap_rb(struct gl_renderbuffer *rb, int flag)
785{
786	struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
787	int r;
788
789	if (rrb == NULL || !rrb->bo)
790		return;
791
792	if (flag) {
793		if (rrb->bo->bom->funcs->bo_wait)
794			radeon_bo_wait(rrb->bo);
795		r = radeon_bo_map(rrb->bo, 1);
796		if (r) {
797			fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
798				__FUNCTION__, r);
799		}
800
801		radeonSetSpanFunctions(rrb);
802	} else {
803		radeon_bo_unmap(rrb->bo);
804		rb->GetRow = NULL;
805		rb->PutRow = NULL;
806	}
807}
808
809static void
810radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map)
811{
812	GLuint i, j;
813
814	/* color draw buffers */
815	for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++)
816		map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map);
817
818	/* check for render to textures */
819	for (i = 0; i < BUFFER_COUNT; i++) {
820		struct gl_renderbuffer_attachment *att =
821			ctx->DrawBuffer->Attachment + i;
822		struct gl_texture_object *tex = att->Texture;
823		if (tex) {
824			/* Render to texture. Note that a mipmapped texture need not
825			 * be complete for render to texture, so we must restrict to
826			 * mapping only the attached image.
827			 */
828			radeon_texture_image *image = get_radeon_texture_image(tex->Image[att->CubeMapFace][att->TextureLevel]);
829			ASSERT(att->Renderbuffer);
830
831			if (map)
832				radeon_teximage_map(image, GL_TRUE);
833			else
834				radeon_teximage_unmap(image);
835		}
836	}
837
838	map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map);
839
840	/* depth buffer (Note wrapper!) */
841	if (ctx->DrawBuffer->_DepthBuffer)
842		map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map);
843
844	if (ctx->DrawBuffer->_StencilBuffer)
845		map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map);
846}
847
848static void radeonSpanRenderStart(GLcontext * ctx)
849{
850	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
851	int i;
852
853	radeon_firevertices(rmesa);
854
855	/* The locking and wait for idle should really only be needed in classic mode.
856	 * In a future memory manager based implementation, this should become
857	 * unnecessary due to the fact that mapping our buffers, textures, etc.
858	 * should implicitly wait for any previous rendering commands that must
859	 * be waited on. */
860	if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
861		LOCK_HARDWARE(rmesa);
862		radeonWaitForIdleLocked(rmesa);
863	}
864
865	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
866		if (ctx->Texture.Unit[i]._ReallyEnabled)
867			ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
868	}
869
870	radeon_map_unmap_buffers(ctx, 1);
871}
872
873static void radeonSpanRenderFinish(GLcontext * ctx)
874{
875	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
876	int i;
877	_swrast_flush(ctx);
878	if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
879		UNLOCK_HARDWARE(rmesa);
880	}
881	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
882		if (ctx->Texture.Unit[i]._ReallyEnabled)
883			ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
884	}
885
886	radeon_map_unmap_buffers(ctx, 0);
887}
888
889void radeonInitSpanFuncs(GLcontext * ctx)
890{
891	struct swrast_device_driver *swdd =
892	    _swrast_GetDeviceDriverReference(ctx);
893	swdd->SpanRenderStart = radeonSpanRenderStart;
894	swdd->SpanRenderFinish = radeonSpanRenderFinish;
895}
896
897/**
898 * Plug in the Get/Put routines for the given driRenderbuffer.
899 */
900static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
901{
902	if (rrb->base.Format == MESA_FORMAT_RGB565) {
903		radeonInitPointers_RGB565(&rrb->base);
904	} else if (rrb->base.Format == MESA_FORMAT_RGB565_REV) {
905		radeonInitPointers_RGB565_REV(&rrb->base);
906	} else if (rrb->base.Format == MESA_FORMAT_XRGB8888) {
907		radeonInitPointers_xRGB8888(&rrb->base);
908        } else if (rrb->base.Format == MESA_FORMAT_XRGB8888_REV) {
909		radeonInitPointers_BGRx8888(&rrb->base);
910	} else if (rrb->base.Format == MESA_FORMAT_ARGB8888) {
911		radeonInitPointers_ARGB8888(&rrb->base);
912        } else if (rrb->base.Format == MESA_FORMAT_ARGB8888_REV) {
913		radeonInitPointers_BGRA8888(&rrb->base);
914	} else if (rrb->base.Format == MESA_FORMAT_ARGB4444) {
915		radeonInitPointers_ARGB4444(&rrb->base);
916	} else if (rrb->base.Format == MESA_FORMAT_ARGB4444_REV) {
917		radeonInitPointers_ARGB4444_REV(&rrb->base);
918	} else if (rrb->base.Format == MESA_FORMAT_ARGB1555) {
919		radeonInitPointers_ARGB1555(&rrb->base);
920	} else if (rrb->base.Format == MESA_FORMAT_ARGB1555_REV) {
921		radeonInitPointers_ARGB1555_REV(&rrb->base);
922	} else if (rrb->base.Format == MESA_FORMAT_Z16) {
923		radeonInitDepthPointers_z16(&rrb->base);
924	} else if (rrb->base.Format == MESA_FORMAT_X8_Z24) {
925		radeonInitDepthPointers_z24(&rrb->base);
926	} else if (rrb->base.Format == MESA_FORMAT_S8_Z24) {
927		radeonInitDepthPointers_s8_z24(&rrb->base);
928	} else if (rrb->base.Format == MESA_FORMAT_S8) {
929		radeonInitStencilPointers_s8_z24(&rrb->base);
930	} else {
931		fprintf(stderr, "radeonSetSpanFunctions: bad format: 0x%04X\n", rrb->base.Format);
932	}
933}
934