radeon_span.c revision 6e1ddd34c6b6f9773ef87198503f5f61f9a6c23a
1/**************************************************************************
2
3Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
4Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5                     VA Linux Systems Inc., Fremont, California.
6
7The Weather Channel (TM) funded Tungsten Graphics to develop the
8initial release of the Radeon 8500 driver under the XFree86 license.
9This notice must be preserved.
10
11All Rights Reserved.
12
13Permission is hereby granted, free of charge, to any person obtaining
14a copy of this software and associated documentation files (the
15"Software"), to deal in the Software without restriction, including
16without limitation the rights to use, copy, modify, merge, publish,
17distribute, sublicense, and/or sell copies of the Software, and to
18permit persons to whom the Software is furnished to do so, subject to
19the following conditions:
20
21The above copyright notice and this permission notice (including the
22next paragraph) shall be included in all copies or substantial
23portions of the Software.
24
25THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
28IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
29LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32
33**************************************************************************/
34
35/*
36 * Authors:
37 *   Kevin E. Martin <martin@valinux.com>
38 *   Gareth Hughes <gareth@valinux.com>
39 *   Keith Whitwell <keith@tungstengraphics.com>
40 *
41 */
42
43#include "main/glheader.h"
44#include "swrast/swrast.h"
45
46#include "radeon_common.h"
47#include "radeon_lock.h"
48#include "radeon_span.h"
49
50#define DBG 0
51
52static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
53
54
55/* r200 depth buffer is always tiled - this is the formula
56   according to the docs unless I typo'ed in it
57*/
58#if defined(RADEON_R200)
59static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb,
60				 GLint x, GLint y)
61{
62    GLubyte *ptr = rrb->bo->ptr;
63    GLint offset;
64    if (rrb->has_surface) {
65	offset = x * rrb->cpp + y * rrb->pitch;
66    } else {
67	GLuint b;
68	offset = 0;
69	b = (((y  >> 4) * (rrb->pitch >> 8) + (x >> 6)));
70	offset += (b >> 1) << 12;
71	offset += (((rrb->pitch >> 8) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
72	offset += ((y >> 2) & 0x3) << 9;
73	offset += ((x >> 3) & 0x1) << 8;
74	offset += ((x >> 4) & 0x3) << 6;
75	offset += ((x >> 2) & 0x1) << 5;
76	offset += ((y >> 1) & 0x1) << 4;
77	offset += ((x >> 1) & 0x1) << 3;
78	offset += (y & 0x1) << 2;
79	offset += (x & 0x1) << 1;
80    }
81    return &ptr[offset];
82}
83
84static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
85				 GLint x, GLint y)
86{
87    GLubyte *ptr = rrb->bo->ptr;
88    GLint offset;
89    if (rrb->has_surface) {
90	offset = x * rrb->cpp + y * rrb->pitch;
91    } else {
92	GLuint b;
93	offset = 0;
94	b = (((y & 0x7ff) >> 4) * (rrb->pitch >> 7) + (x >> 5));
95	offset += (b >> 1) << 12;
96	offset += (((rrb->pitch >> 7) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
97	offset += ((y >> 2) & 0x3) << 9;
98	offset += ((x >> 2) & 0x1) << 8;
99	offset += ((x >> 3) & 0x3) << 6;
100	offset += ((y >> 1) & 0x1) << 5;
101	offset += ((x >> 1) & 0x1) << 4;
102	offset += (y & 0x1) << 3;
103	offset += (x & 0x1) << 2;
104    }
105    return &ptr[offset];
106}
107#endif
108
109/* r600 tiling
110 * two main types:
111 * - 1D (akin to macro-linear/micro-tiled on older asics)
112 * - 2D (akin to macro-tiled/micro-tiled on older asics)
113 * only 1D tiling is implemented below
114 */
115#if defined(RADEON_R600)
116static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb,
117					GLint x, GLint y, GLint is_depth, GLint is_stencil)
118{
119    GLint element_bytes = rrb->cpp;
120    GLint num_samples = 1;
121    GLint tile_width = 8;
122    GLint tile_height = 8;
123    GLint tile_thickness = 1;
124    GLint pitch_elements = rrb->pitch / element_bytes;
125    GLint height = rrb->base.Height;
126    GLint z = 0;
127    GLint sample_number = 0;
128    /* */
129    GLint tile_bytes;
130    GLint tiles_per_row;
131    GLint tiles_per_slice;
132    GLint slice_offset;
133    GLint tile_row_index;
134    GLint tile_column_index;
135    GLint tile_offset;
136    GLint pixel_number = 0;
137    GLint element_offset;
138    GLint offset = 0;
139
140    tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples;
141    tiles_per_row = pitch_elements / tile_width;
142    tiles_per_slice = tiles_per_row * (height / tile_height);
143    slice_offset = (z / tile_thickness) * tiles_per_slice * tile_bytes;
144    tile_row_index = y / tile_height;
145    tile_column_index = x / tile_width;
146    tile_offset = ((tile_row_index * tiles_per_row) + tile_column_index) * tile_bytes;
147
148    if (is_depth) {
149	    GLint pixel_offset = 0;
150
151	    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
152	    pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0]
153	    pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1]
154	    pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
155	    pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2]
156	    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
157	    switch (element_bytes) {
158	    case 2:
159		    pixel_offset = pixel_number * element_bytes * num_samples;
160		    break;
161	    case 4:
162		    /* stencil and depth data are stored separately within a tile.
163		     * stencil is stored in a contiguous tile before the depth tile.
164		     * stencil element is 1 byte, depth element is 3 bytes.
165		     * stencil tile is 64 bytes.
166		     */
167		    if (is_stencil)
168			    pixel_offset = pixel_number * 1 * num_samples;
169		    else
170			    pixel_offset = (pixel_number * 3 * num_samples) + 64;
171		    break;
172	    }
173	    element_offset = pixel_offset + (sample_number * element_bytes);
174    } else {
175	    GLint sample_offset;
176
177	    switch (element_bytes) {
178	    case 1:
179		    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
180		    pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
181		    pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
182		    pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
183		    pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0]
184		    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
185		    break;
186	    case 2:
187		    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
188		    pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
189		    pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
190		    pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0]
191		    pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
192		    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
193		    break;
194	    case 4:
195		    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
196		    pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
197		    pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0]
198		    pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2]
199		    pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
200		    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
201		    break;
202	    }
203	    sample_offset = sample_number * (tile_bytes / num_samples);
204	    element_offset = sample_offset + (pixel_number * element_bytes);
205    }
206    offset = slice_offset + tile_offset + element_offset;
207    return offset;
208}
209
210/* depth buffers */
211static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb,
212			       GLint x, GLint y)
213{
214    GLubyte *ptr = rrb->bo->ptr;
215    GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 0);
216    return &ptr[offset];
217}
218
219static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb,
220				 GLint x, GLint y)
221{
222    GLubyte *ptr = rrb->bo->ptr;
223    GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 1);
224    return &ptr[offset];
225}
226
227static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb,
228			       GLint x, GLint y)
229{
230    GLubyte *ptr = rrb->bo->ptr;
231    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
232    GLint offset;
233
234    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
235        offset = x * rrb->cpp + y * rrb->pitch;
236    } else {
237	    offset = r600_1d_tile_helper(rrb, x, y, 0, 0);
238    }
239    return &ptr[offset];
240}
241
242#else
243
244/* radeon tiling on r300-r500 has 4 states,
245   macro-linear/micro-linear
246   macro-linear/micro-tiled
247   macro-tiled /micro-linear
248   macro-tiled /micro-tiled
249   1 byte surface
250   2 byte surface - two types - we only provide 8x2 microtiling
251   4 byte surface
252   8/16 byte (unused)
253*/
254static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb,
255			     GLint x, GLint y)
256{
257    GLubyte *ptr = rrb->bo->ptr;
258    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
259    GLint offset;
260
261    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
262        offset = x * rrb->cpp + y * rrb->pitch;
263    } else {
264        offset = 0;
265        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
266	    if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
267		offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 5)) << 11;
268		offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 10;
269		offset += (((y >> 4) ^ (x >> 4)) & 0x1) << 9;
270		offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 8;
271		offset += (((y >> 3) ^ (x >> 3)) & 0x1) << 7;
272		offset += ((y >> 1) & 0x1) << 6;
273		offset += ((x >> 2) & 0x1) << 5;
274		offset += (y & 1) << 4;
275		offset += (x & 3) << 2;
276            } else {
277		offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 6)) << 11;
278		offset += (((y >> 2) ^ (x >> 6)) & 0x1) << 10;
279		offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 9;
280		offset += (((y >> 1) ^ (x >> 5)) & 0x1) << 8;
281		offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 7;
282		offset += (y & 1) << 6;
283		offset += (x & 15) << 2;
284            }
285        } else {
286	    offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 2)) << 5;
287	    offset += (y & 1) << 4;
288	    offset += (x & 3) << 2;
289        }
290    }
291    return &ptr[offset];
292}
293
294static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb,
295				     GLint x, GLint y)
296{
297    GLubyte *ptr = rrb->bo->ptr;
298    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
299    GLint offset;
300
301    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
302        offset = x * rrb->cpp + y * rrb->pitch;
303    } else {
304        offset = 0;
305        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
306            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
307		offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 6)) << 11;
308		offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 10;
309		offset += (((y >> 4) ^ (x >> 5)) & 0x1) << 9;
310		offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 8;
311		offset += (((y >> 3) ^ (x >> 4)) & 0x1) << 7;
312		offset += ((y >> 1) & 0x1) << 6;
313		offset += ((x >> 3) & 0x1) << 5;
314		offset += (y & 1) << 4;
315		offset += (x & 3) << 2;
316            } else {
317		offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 7)) << 11;
318		offset += (((y >> 2) ^ (x >> 7)) & 0x1) << 10;
319		offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 9;
320		offset += (((y >> 1) ^ (x >> 6)) & 0x1) << 8;
321		offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 7;
322		offset += (y & 1) << 6;
323		offset += ((x >> 4) & 0x1) << 5;
324                offset += (x & 15) << 2;
325            }
326        } else {
327	    offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 3)) << 5;
328	    offset += (y & 0x1) << 4;
329	    offset += (x & 0x7) << 1;
330        }
331    }
332    return &ptr[offset];
333}
334
335#endif
336
337/*
338 * Note that all information needed to access pixels in a renderbuffer
339 * should be obtained through the gl_renderbuffer parameter, not per-context
340 * information.
341 */
342#define LOCAL_VARS						\
343   struct radeon_context *radeon = RADEON_CONTEXT(ctx);			\
344   struct radeon_renderbuffer *rrb = (void *) rb;		\
345   const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;			\
346   const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
347   unsigned int num_cliprects;						\
348   struct drm_clip_rect *cliprects;					\
349   int x_off, y_off;							\
350   GLuint p;						\
351   (void)p;						\
352   radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
353
354#define LOCAL_DEPTH_VARS				\
355   struct radeon_context *radeon = RADEON_CONTEXT(ctx);			\
356   struct radeon_renderbuffer *rrb = (void *) rb;	\
357   const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;			\
358   const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
359   unsigned int num_cliprects;						\
360   struct drm_clip_rect *cliprects;					\
361   int x_off, y_off;							\
362  radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
363
364#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
365
366#define Y_FLIP(_y) ((_y) * yScale + yBias)
367
368#define HW_LOCK()
369
370#define HW_UNLOCK()
371
372/* XXX FBO: this is identical to the macro in spantmp2.h except we get
373 * the cliprect info from the context, not the driDrawable.
374 * Move this into spantmp2.h someday.
375 */
376#define HW_CLIPLOOP()							\
377   do {									\
378      int _nc = num_cliprects;						\
379      while ( _nc-- ) {							\
380	 int minx = cliprects[_nc].x1 - x_off;				\
381	 int miny = cliprects[_nc].y1 - y_off;				\
382	 int maxx = cliprects[_nc].x2 - x_off;				\
383	 int maxy = cliprects[_nc].y2 - y_off;
384
385/* ================================================================
386 * Color buffer
387 */
388
389/* 16 bit, RGB565 color spanline and pixel functions
390 */
391#define SPANTMP_PIXEL_FMT GL_RGB
392#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
393
394#define TAG(x)    radeon##x##_RGB565
395#define TAG2(x,y) radeon##x##_RGB565##y
396#if defined(RADEON_R600)
397#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
398#else
399#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
400#endif
401#include "spantmp2.h"
402
403/* 16 bit, ARGB1555 color spanline and pixel functions
404 */
405#define SPANTMP_PIXEL_FMT GL_BGRA
406#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV
407
408#define TAG(x)    radeon##x##_ARGB1555
409#define TAG2(x,y) radeon##x##_ARGB1555##y
410#if defined(RADEON_R600)
411#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
412#else
413#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
414#endif
415#include "spantmp2.h"
416
417/* 16 bit, RGBA4 color spanline and pixel functions
418 */
419#define SPANTMP_PIXEL_FMT GL_BGRA
420#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV
421
422#define TAG(x)    radeon##x##_ARGB4444
423#define TAG2(x,y) radeon##x##_ARGB4444##y
424#if defined(RADEON_R600)
425#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
426#else
427#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
428#endif
429#include "spantmp2.h"
430
431/* 32 bit, xRGB8888 color spanline and pixel functions
432 */
433#define SPANTMP_PIXEL_FMT GL_BGRA
434#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
435
436#define TAG(x)    radeon##x##_xRGB8888
437#define TAG2(x,y) radeon##x##_xRGB8888##y
438#if defined(RADEON_R600)
439#define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0xff000000))
440#define PUT_VALUE(_x, _y, d) { \
441   GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off );		\
442   *_ptr = d;								\
443} while (0)
444#else
445#define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000))
446#define PUT_VALUE(_x, _y, d) { \
447   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
448   *_ptr = d;								\
449} while (0)
450#endif
451#include "spantmp2.h"
452
453/* 32 bit, ARGB8888 color spanline and pixel functions
454 */
455#define SPANTMP_PIXEL_FMT GL_BGRA
456#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
457
458#define TAG(x)    radeon##x##_ARGB8888
459#define TAG2(x,y) radeon##x##_ARGB8888##y
460#if defined(RADEON_R600)
461#define GET_VALUE(_x, _y) (*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)))
462#define PUT_VALUE(_x, _y, d) { \
463   GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off );		\
464   *_ptr = d;								\
465} while (0)
466#else
467#define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)))
468#define PUT_VALUE(_x, _y, d) { \
469   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
470   *_ptr = d;								\
471} while (0)
472#endif
473#include "spantmp2.h"
474
475/* ================================================================
476 * Depth buffer
477 */
478
479/* The Radeon family has depth tiling on all the time, so we have to convert
480 * the x,y coordinates into the memory bus address (mba) in the same
481 * manner as the engine.  In each case, the linear block address (ba)
482 * is calculated, and then wired with x and y to produce the final
483 * memory address.
484 * The chip will do address translation on its own if the surface registers
485 * are set up correctly. It is not quite enough to get it working with hyperz
486 * too...
487 */
488
489/* 16-bit depth buffer functions
490 */
491#define VALUE_TYPE GLushort
492
493#if defined(RADEON_R200)
494#define WRITE_DEPTH( _x, _y, d )					\
495   *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d
496#elif defined(RADEON_R600)
497#define WRITE_DEPTH( _x, _y, d )					\
498   *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) = d
499#else
500#define WRITE_DEPTH( _x, _y, d )					\
501   *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d
502#endif
503
504#if defined(RADEON_R200)
505#define READ_DEPTH( d, _x, _y )						\
506   d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off)
507#elif defined(RADEON_R600)
508#define READ_DEPTH( d, _x, _y )						\
509   d = *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off)
510#else
511#define READ_DEPTH( d, _x, _y )						\
512   d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off)
513#endif
514
515#define TAG(x) radeon##x##_z16
516#include "depthtmp.h"
517
518/* 24 bit depth
519 *
520 * Careful: It looks like the R300 uses ZZZS byte order while the R200
521 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
522 */
523#define VALUE_TYPE GLuint
524
525#if defined(RADEON_R300)
526#define WRITE_DEPTH( _x, _y, d )					\
527do {									\
528   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
529   GLuint tmp = *_ptr;				\
530   tmp &= 0x000000ff;							\
531   tmp |= ((d << 8) & 0xffffff00);					\
532   *_ptr = tmp;					\
533} while (0)
534#elif defined(RADEON_R600)
535#define WRITE_DEPTH( _x, _y, d )					\
536do {									\
537   GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off );		\
538   GLuint tmp = *_ptr;				\
539   tmp &= 0xff000000;							\
540   tmp |= ((d) & 0x00ffffff);					\
541   *_ptr = tmp;					\
542} while (0)
543#elif defined(RADEON_R200)
544#define WRITE_DEPTH( _x, _y, d )					\
545do {									\
546   GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );		\
547   GLuint tmp = *_ptr;				\
548   tmp &= 0xff000000;							\
549   tmp |= ((d) & 0x00ffffff);						\
550   *_ptr = tmp;					\
551} while (0)
552#else
553#define WRITE_DEPTH( _x, _y, d )					\
554do {									\
555   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );	\
556   GLuint tmp = *_ptr;							\
557   tmp &= 0xff000000;							\
558   tmp |= ((d) & 0x00ffffff);						\
559   *_ptr = tmp;					\
560} while (0)
561#endif
562
563#if defined(RADEON_R300)
564#define READ_DEPTH( d, _x, _y )						\
565  do {									\
566    d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
567  }while(0)
568#elif defined(RADEON_R600)
569#define READ_DEPTH( d, _x, _y )						\
570  do {									\
571    d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off)) & 0x00ffffff); \
572  }while(0)
573#elif defined(RADEON_R200)
574#define READ_DEPTH( d, _x, _y )						\
575  do {									\
576    d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; \
577  }while(0)
578#else
579#define READ_DEPTH( d, _x, _y )	\
580  d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off,	_y + y_off)) & 0x00ffffff;
581#endif
582
583#define TAG(x) radeon##x##_z24
584#include "depthtmp.h"
585
586/* 24 bit depth, 8 bit stencil depthbuffer functions
587 * EXT_depth_stencil
588 *
589 * Careful: It looks like the R300 uses ZZZS byte order while the R200
590 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
591 */
592#define VALUE_TYPE GLuint
593
594#if defined(RADEON_R300)
595#define WRITE_DEPTH( _x, _y, d )					\
596do {									\
597   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
598   *_ptr = d;								\
599} while (0)
600#elif defined(RADEON_R600)
601#define WRITE_DEPTH( _x, _y, d )					\
602do {									\
603   GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off );		\
604   GLuint tmp = *_ptr;				\
605   tmp &= 0xff000000;							\
606   tmp |= (((d) >> 8) & 0x00ffffff);					\
607   *_ptr = tmp;					\
608   _ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off);		\
609   tmp = *_ptr;				\
610   tmp &= 0xffffff00;							\
611   tmp |= (d) & 0xff;							\
612   *_ptr = tmp;					\
613} while (0)
614#elif defined(RADEON_R200)
615#define WRITE_DEPTH( _x, _y, d )					\
616do {									\
617   GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );		\
618   *_ptr = d;								\
619} while (0)
620#else
621#define WRITE_DEPTH( _x, _y, d )					\
622do {									\
623   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );	\
624   *_ptr = d;					\
625} while (0)
626#endif
627
628#if defined(RADEON_R300)
629#define READ_DEPTH( d, _x, _y )						\
630  do { \
631    d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)));	\
632  }while(0)
633#elif defined(RADEON_R600)
634#define READ_DEPTH( d, _x, _y )						\
635  do { \
636    d = ((*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) << 8) & 0xffffff00; \
637    d |= (*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) & 0x000000ff;	\
638  }while(0)
639#elif defined(RADEON_R200)
640#define READ_DEPTH( d, _x, _y )						\
641  do { \
642    d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off));	\
643  }while(0)
644#else
645#define READ_DEPTH( d, _x, _y )	do {					\
646    d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off,	_y + y_off )); \
647  } while (0)
648#endif
649
650#define TAG(x) radeon##x##_s8_z24
651#include "depthtmp.h"
652
653/* ================================================================
654 * Stencil buffer
655 */
656
657/* 24 bit depth, 8 bit stencil depthbuffer functions
658 */
659#ifdef RADEON_R300
660#define WRITE_STENCIL( _x, _y, d )					\
661do {									\
662   GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off);		\
663   GLuint tmp = *_ptr;				\
664   tmp &= 0xffffff00;							\
665   tmp |= (d) & 0xff;							\
666   *_ptr = tmp;					\
667} while (0)
668#elif defined(RADEON_R600)
669#define WRITE_STENCIL( _x, _y, d )					\
670do {									\
671   GLuint *_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off);		\
672   GLuint tmp = *_ptr;				\
673   tmp &= 0xffffff00;							\
674   tmp |= (d) & 0xff;							\
675   *_ptr = tmp;					\
676} while (0)
677#elif defined(RADEON_R200)
678#define WRITE_STENCIL( _x, _y, d )					\
679do {									\
680   GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off);		\
681   GLuint tmp = *_ptr;				\
682   tmp &= 0x00ffffff;							\
683   tmp |= (((d) & 0xff) << 24);						\
684   *_ptr = tmp;					\
685} while (0)
686#else
687#define WRITE_STENCIL( _x, _y, d )					\
688do {									\
689   GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off);		\
690   GLuint tmp = *_ptr;				\
691   tmp &= 0x00ffffff;							\
692   tmp |= (((d) & 0xff) << 24);						\
693   *_ptr = tmp;					\
694} while (0)
695#endif
696
697#ifdef RADEON_R300
698#define READ_STENCIL( d, _x, _y )					\
699do {									\
700   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
701   GLuint tmp = *_ptr;				\
702   d = tmp & 0x000000ff;						\
703} while (0)
704#elif defined(RADEON_R600)
705#define READ_STENCIL( d, _x, _y )					\
706do {									\
707   GLuint *_ptr = (GLuint*)r600_ptr_stencil( rrb, _x + x_off, _y + y_off );		\
708   GLuint tmp = *_ptr;				\
709   d = tmp & 0x000000ff;						\
710} while (0)
711#elif defined(RADEON_R200)
712#define READ_STENCIL( d, _x, _y )					\
713do {									\
714   GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );		\
715   GLuint tmp = *_ptr;				\
716   d = (tmp & 0xff000000) >> 24;					\
717} while (0)
718#else
719#define READ_STENCIL( d, _x, _y )					\
720do {									\
721   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
722   GLuint tmp = *_ptr;				\
723   d = (tmp & 0xff000000) >> 24;					\
724} while (0)
725#endif
726
727#define TAG(x) radeon##x##_s8_z24
728#include "stenciltmp.h"
729
730
731static void map_unmap_rb(struct gl_renderbuffer *rb, int flag)
732{
733	struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
734	int r;
735
736	if (rrb == NULL || !rrb->bo)
737		return;
738
739	if (flag) {
740		if (rrb->bo->bom->funcs->bo_wait)
741			radeon_bo_wait(rrb->bo);
742		r = radeon_bo_map(rrb->bo, 1);
743		if (r) {
744			fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
745				__FUNCTION__, r);
746		}
747
748		radeonSetSpanFunctions(rrb);
749	} else {
750		radeon_bo_unmap(rrb->bo);
751		rb->GetRow = NULL;
752		rb->PutRow = NULL;
753	}
754}
755
756static void
757radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map)
758{
759	GLuint i, j;
760
761	/* color draw buffers */
762	for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++)
763		map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map);
764
765	/* check for render to textures */
766	for (i = 0; i < BUFFER_COUNT; i++) {
767		struct gl_renderbuffer_attachment *att =
768			ctx->DrawBuffer->Attachment + i;
769		struct gl_texture_object *tex = att->Texture;
770		if (tex) {
771			/* Render to texture. Note that a mipmapped texture need not
772			 * be complete for render to texture, so we must restrict to
773			 * mapping only the attached image.
774			 */
775			radeon_texture_image *image = get_radeon_texture_image(tex->Image[att->CubeMapFace][att->TextureLevel]);
776			ASSERT(att->Renderbuffer);
777
778			if (map)
779				radeon_teximage_map(image, GL_TRUE);
780			else
781				radeon_teximage_unmap(image);
782		}
783	}
784
785	map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map);
786
787	/* depth buffer (Note wrapper!) */
788	if (ctx->DrawBuffer->_DepthBuffer)
789		map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map);
790
791	if (ctx->DrawBuffer->_StencilBuffer)
792		map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map);
793}
794
795static void radeonSpanRenderStart(GLcontext * ctx)
796{
797	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
798	int i;
799
800	radeon_firevertices(rmesa);
801
802	/* The locking and wait for idle should really only be needed in classic mode.
803	 * In a future memory manager based implementation, this should become
804	 * unnecessary due to the fact that mapping our buffers, textures, etc.
805	 * should implicitly wait for any previous rendering commands that must
806	 * be waited on. */
807	if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
808		LOCK_HARDWARE(rmesa);
809		radeonWaitForIdleLocked(rmesa);
810	}
811
812	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
813		if (ctx->Texture.Unit[i]._ReallyEnabled)
814			ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
815	}
816
817	radeon_map_unmap_buffers(ctx, 1);
818}
819
820static void radeonSpanRenderFinish(GLcontext * ctx)
821{
822	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
823	int i;
824	_swrast_flush(ctx);
825	if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
826		UNLOCK_HARDWARE(rmesa);
827	}
828	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
829		if (ctx->Texture.Unit[i]._ReallyEnabled)
830			ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
831	}
832
833	radeon_map_unmap_buffers(ctx, 0);
834}
835
836void radeonInitSpanFuncs(GLcontext * ctx)
837{
838	struct swrast_device_driver *swdd =
839	    _swrast_GetDeviceDriverReference(ctx);
840	swdd->SpanRenderStart = radeonSpanRenderStart;
841	swdd->SpanRenderFinish = radeonSpanRenderFinish;
842}
843
844/**
845 * Plug in the Get/Put routines for the given driRenderbuffer.
846 */
847static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
848{
849	if (rrb->base.Format == MESA_FORMAT_RGB565) {
850		radeonInitPointers_RGB565(&rrb->base);
851	} else if (rrb->base.Format == MESA_FORMAT_RGBA8888) { /* XXX */
852		radeonInitPointers_xRGB8888(&rrb->base);
853	} else if (rrb->base.Format == MESA_FORMAT_RGBA8888) {
854		radeonInitPointers_ARGB8888(&rrb->base);
855	} else if (rrb->base.Format == MESA_FORMAT_ARGB4444) {
856		radeonInitPointers_ARGB4444(&rrb->base);
857	} else if (rrb->base.Format == MESA_FORMAT_ARGB1555) {
858		radeonInitPointers_ARGB1555(&rrb->base);
859	} else if (rrb->base.Format == MESA_FORMAT_Z16) {
860		radeonInitDepthPointers_z16(&rrb->base);
861	} else if (rrb->base.Format == GL_DEPTH_COMPONENT32) { /* XXX */
862		radeonInitDepthPointers_z24(&rrb->base);
863	} else if (rrb->base.Format == MESA_FORMAT_S8_Z24) {
864		radeonInitDepthPointers_s8_z24(&rrb->base);
865	} else if (rrb->base.Format == MESA_FORMAT_S8) {
866		radeonInitStencilPointers_s8_z24(&rrb->base);
867	} else {
868		fprintf(stderr, "radeonSetSpanFunctions: bad format: 0x%04X\n", rrb->base.Format);
869	}
870}
871