radeon_span.c revision 364ca57aff733e8ee5f417b3f8719514f443315f
1/**************************************************************************
2
3Copyright (C) The Weather Channel, Inc.  2002.  All Rights Reserved.
4Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and
5                     VA Linux Systems Inc., Fremont, California.
6
7The Weather Channel (TM) funded Tungsten Graphics to develop the
8initial release of the Radeon 8500 driver under the XFree86 license.
9This notice must be preserved.
10
11All Rights Reserved.
12
13Permission is hereby granted, free of charge, to any person obtaining
14a copy of this software and associated documentation files (the
15"Software"), to deal in the Software without restriction, including
16without limitation the rights to use, copy, modify, merge, publish,
17distribute, sublicense, and/or sell copies of the Software, and to
18permit persons to whom the Software is furnished to do so, subject to
19the following conditions:
20
21The above copyright notice and this permission notice (including the
22next paragraph) shall be included in all copies or substantial
23portions of the Software.
24
25THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
28IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
29LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
30OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
31WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32
33**************************************************************************/
34
35/*
36 * Authors:
37 *   Kevin E. Martin <martin@valinux.com>
38 *   Gareth Hughes <gareth@valinux.com>
39 *   Keith Whitwell <keith@tungstengraphics.com>
40 *
41 */
42
43#include "main/glheader.h"
44#include "swrast/swrast.h"
45
46#include "radeon_common.h"
47#include "radeon_lock.h"
48#include "radeon_span.h"
49
50#define DBG 0
51
52static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb);
53
54
55/* r200 depth buffer is always tiled - this is the formula
56   according to the docs unless I typo'ed in it
57*/
58#if defined(RADEON_COMMON_FOR_R200)
59static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb,
60				 GLint x, GLint y)
61{
62    GLubyte *ptr = rrb->bo->ptr;
63    GLint offset;
64    if (rrb->has_surface) {
65	offset = x * rrb->cpp + y * rrb->pitch;
66    } else {
67	GLuint b;
68	offset = 0;
69	b = (((y  >> 4) * (rrb->pitch >> 8) + (x >> 6)));
70	offset += (b >> 1) << 12;
71	offset += (((rrb->pitch >> 8) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
72	offset += ((y >> 2) & 0x3) << 9;
73	offset += ((x >> 3) & 0x1) << 8;
74	offset += ((x >> 4) & 0x3) << 6;
75	offset += ((x >> 2) & 0x1) << 5;
76	offset += ((y >> 1) & 0x1) << 4;
77	offset += ((x >> 1) & 0x1) << 3;
78	offset += (y & 0x1) << 2;
79	offset += (x & 0x1) << 1;
80    }
81    return &ptr[offset];
82}
83
84static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb,
85				 GLint x, GLint y)
86{
87    GLubyte *ptr = rrb->bo->ptr;
88    GLint offset;
89    if (rrb->has_surface) {
90	offset = x * rrb->cpp + y * rrb->pitch;
91    } else {
92	GLuint b;
93	offset = 0;
94	b = (((y & 0x7ff) >> 4) * (rrb->pitch >> 7) + (x >> 5));
95	offset += (b >> 1) << 12;
96	offset += (((rrb->pitch >> 7) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11;
97	offset += ((y >> 2) & 0x3) << 9;
98	offset += ((x >> 2) & 0x1) << 8;
99	offset += ((x >> 3) & 0x3) << 6;
100	offset += ((y >> 1) & 0x1) << 5;
101	offset += ((x >> 1) & 0x1) << 4;
102	offset += (y & 0x1) << 3;
103	offset += (x & 0x1) << 2;
104    }
105    return &ptr[offset];
106}
107#endif
108
109/* r600 tiling
110 * two main types:
111 * - 1D (akin to macro-linear/micro-tiled on older asics)
112 * - 2D (akin to macro-tiled/micro-tiled on older asics)
113 * only 1D tiling is implemented below
114 */
115#if defined(RADEON_COMMON_FOR_R600)
116static GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb,
117				 GLint x, GLint y, GLint is_depth, GLint is_stencil)
118{
119    GLint element_bytes = rrb->cpp;
120    GLint num_samples = 1;
121    GLint tile_width = 8;
122    GLint tile_height = 8;
123    GLint tile_thickness = 1;
124    GLint pitch_elements = rrb->pitch / element_bytes;
125    GLint height = rrb->base.Height;
126    GLint z = 0;
127    GLint sample_number = 0;
128    /* */
129    GLint tile_bytes;
130    GLint tiles_per_row;
131    GLint tiles_per_slice;
132    GLint slice_offset;
133    GLint tile_row_index;
134    GLint tile_column_index;
135    GLint tile_offset;
136    GLint pixel_number = 0;
137    GLint element_offset;
138    GLint offset = 0;
139
140    tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples;
141    tiles_per_row = pitch_elements /tile_width;
142    tiles_per_slice = tiles_per_row * (height / tile_height);
143    slice_offset = (z / tile_thickness) * tiles_per_slice * tile_bytes;
144    tile_row_index = y / tile_height;
145    tile_column_index = x / tile_width;
146    tile_offset = ((tile_row_index * tiles_per_row) + tile_column_index) * tile_bytes;
147
148    if (is_depth) {
149	    GLint pixel_offset = 0;
150
151	    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
152	    pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0]
153	    pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1]
154	    pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
155	    pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2]
156	    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
157	    switch (element_bytes) {
158	    case 2:
159		    pixel_offset = pixel_number * element_bytes * num_samples;
160		    element_offset = pixel_offset + (sample_number * element_bytes);
161		    break;
162	    case 4:
163		    /* stencil and depth data are stored separately within a tile.
164		     * stencil is stored in a contiguous tile before the depth tile.
165		     * stencil element is 1 byte, depth element is 3 bytes.
166		     * stencil tile is 64 bytes.
167		     */
168		    if (is_stencil)
169			    pixel_offset = pixel_number * 1 * num_samples;
170		    else
171			    pixel_offset = (pixel_number * 3 * num_samples) + 64;
172		    break;
173	    }
174	    element_offset = pixel_offset + (sample_number * element_bytes);
175    } else {
176	    GLint sample_offset;
177
178	    switch (element_bytes) {
179	    case 1:
180		    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
181		    pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
182		    pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
183		    pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1]
184		    pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0]
185		    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
186		    break;
187	    case 2:
188		    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
189		    pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
190		    pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2]
191		    pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0]
192		    pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
193		    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
194		    break;
195	    case 4:
196		    pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0]
197		    pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1]
198		    pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0]
199		    pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2]
200		    pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1]
201		    pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2]
202		    break;
203	    }
204	    sample_offset = sample_number * (tile_bytes / num_samples);
205	    element_offset = sample_offset + (pixel_number * element_bytes);
206    }
207    offset = slice_offset + tile_offset + element_offset;
208    return offset;
209}
210
211/* depth buffers */
212static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb,
213			       GLint x, GLint y)
214{
215    GLubyte *ptr = rrb->bo->ptr;
216    GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 0);
217    return &ptr[offset];
218}
219
220static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb,
221				 GLint x, GLint y)
222{
223    GLubyte *ptr = rrb->bo->ptr;
224    GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 1);
225    return &ptr[offset];
226}
227
228static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb,
229			       GLint x, GLint y)
230{
231    GLubyte *ptr = rrb->bo->ptr;
232    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
233    GLint offset;
234
235    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
236        offset = x * rrb->cpp + y * rrb->pitch;
237    } else {
238	    offset = r600_1d_tile_helper(rrb, x, y, 0, 0);
239    }
240    return &ptr[offset];
241}
242
243#endif
244
245/* radeon tiling on r300-r500 has 4 states,
246   macro-linear/micro-linear
247   macro-linear/micro-tiled
248   macro-tiled /micro-linear
249   macro-tiled /micro-tiled
250   1 byte surface
251   2 byte surface - two types - we only provide 8x2 microtiling
252   4 byte surface
253   8/16 byte (unused)
254*/
255static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb,
256			     GLint x, GLint y)
257{
258    GLubyte *ptr = rrb->bo->ptr;
259    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
260    GLint offset;
261
262    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
263        offset = x * rrb->cpp + y * rrb->pitch;
264    } else {
265        offset = 0;
266        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
267	    if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
268		offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 5)) << 11;
269		offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 10;
270		offset += (((y >> 4) ^ (x >> 4)) & 0x1) << 9;
271		offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 8;
272		offset += (((y >> 3) ^ (x >> 3)) & 0x1) << 7;
273		offset += ((y >> 1) & 0x1) << 6;
274		offset += ((x >> 2) & 0x1) << 5;
275		offset += (y & 1) << 4;
276		offset += (x & 3) << 2;
277            } else {
278		offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 6)) << 11;
279		offset += (((y >> 2) ^ (x >> 6)) & 0x1) << 10;
280		offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 9;
281		offset += (((y >> 1) ^ (x >> 5)) & 0x1) << 8;
282		offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 7;
283		offset += (y & 1) << 6;
284		offset += (x & 15) << 2;
285            }
286        } else {
287	    offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 2)) << 5;
288	    offset += (y & 1) << 4;
289	    offset += (x & 3) << 2;
290        }
291    }
292    return &ptr[offset];
293}
294
295static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb,
296				     GLint x, GLint y)
297{
298    GLubyte *ptr = rrb->bo->ptr;
299    uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE;
300    GLint offset;
301
302    if (rrb->has_surface || !(rrb->bo->flags & mask)) {
303        offset = x * rrb->cpp + y * rrb->pitch;
304    } else {
305        offset = 0;
306        if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) {
307            if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) {
308		offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 6)) << 11;
309		offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 10;
310		offset += (((y >> 4) ^ (x >> 5)) & 0x1) << 9;
311		offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 8;
312		offset += (((y >> 3) ^ (x >> 4)) & 0x1) << 7;
313		offset += ((y >> 1) & 0x1) << 6;
314		offset += ((x >> 3) & 0x1) << 5;
315		offset += (y & 1) << 4;
316		offset += (x & 3) << 2;
317            } else {
318		offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 7)) << 11;
319		offset += (((y >> 2) ^ (x >> 7)) & 0x1) << 10;
320		offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 9;
321		offset += (((y >> 1) ^ (x >> 6)) & 0x1) << 8;
322		offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 7;
323		offset += (y & 1) << 6;
324		offset += ((x >> 4) & 0x1) << 5;
325                offset += (x & 15) << 2;
326            }
327        } else {
328	    offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 3)) << 5;
329	    offset += (y & 0x1) << 4;
330	    offset += (x & 0x7) << 1;
331        }
332    }
333    return &ptr[offset];
334}
335
336#ifndef COMPILE_R300
337static uint32_t
338z24s8_to_s8z24(uint32_t val)
339{
340   return (val << 24) | (val >> 8);
341}
342
343static uint32_t
344s8z24_to_z24s8(uint32_t val)
345{
346   return (val >> 24) | (val << 8);
347}
348#endif
349
350/*
351 * Note that all information needed to access pixels in a renderbuffer
352 * should be obtained through the gl_renderbuffer parameter, not per-context
353 * information.
354 */
355#define LOCAL_VARS						\
356   struct radeon_context *radeon = RADEON_CONTEXT(ctx);			\
357   struct radeon_renderbuffer *rrb = (void *) rb;		\
358   const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;			\
359   const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
360   unsigned int num_cliprects;						\
361   struct drm_clip_rect *cliprects;					\
362   int x_off, y_off;							\
363   GLuint p;						\
364   (void)p;						\
365   radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
366
367#define LOCAL_DEPTH_VARS				\
368   struct radeon_context *radeon = RADEON_CONTEXT(ctx);			\
369   struct radeon_renderbuffer *rrb = (void *) rb;	\
370   const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1;			\
371   const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\
372   unsigned int num_cliprects;						\
373   struct drm_clip_rect *cliprects;					\
374   int x_off, y_off;							\
375  radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off);
376
377#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS
378
379#define Y_FLIP(_y) ((_y) * yScale + yBias)
380
381#define HW_LOCK()
382
383#define HW_UNLOCK()
384
385/* XXX FBO: this is identical to the macro in spantmp2.h except we get
386 * the cliprect info from the context, not the driDrawable.
387 * Move this into spantmp2.h someday.
388 */
389#define HW_CLIPLOOP()							\
390   do {									\
391      int _nc = num_cliprects;						\
392      while ( _nc-- ) {							\
393	 int minx = cliprects[_nc].x1 - x_off;				\
394	 int miny = cliprects[_nc].y1 - y_off;				\
395	 int maxx = cliprects[_nc].x2 - x_off;				\
396	 int maxy = cliprects[_nc].y2 - y_off;
397
398/* ================================================================
399 * Color buffer
400 */
401
402/* 16 bit, RGB565 color spanline and pixel functions
403 */
404#define SPANTMP_PIXEL_FMT GL_RGB
405#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5
406
407#define TAG(x)    radeon##x##_RGB565
408#define TAG2(x,y) radeon##x##_RGB565##y
409#if defined(RADEON_COMMON_FOR_R600)
410#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
411#else
412#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
413#endif
414#include "spantmp2.h"
415
416/* 16 bit, ARGB1555 color spanline and pixel functions
417 */
418#define SPANTMP_PIXEL_FMT GL_BGRA
419#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV
420
421#define TAG(x)    radeon##x##_ARGB1555
422#define TAG2(x,y) radeon##x##_ARGB1555##y
423#if defined(RADEON_COMMON_FOR_R600)
424#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
425#else
426#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
427#endif
428#include "spantmp2.h"
429
430/* 16 bit, RGBA4 color spanline and pixel functions
431 */
432#define SPANTMP_PIXEL_FMT GL_BGRA
433#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV
434
435#define TAG(x)    radeon##x##_ARGB4444
436#define TAG2(x,y) radeon##x##_ARGB4444##y
437#if defined(RADEON_COMMON_FOR_R600)
438#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off)
439#else
440#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off)
441#endif
442#include "spantmp2.h"
443
444/* 32 bit, xRGB8888 color spanline and pixel functions
445 */
446#define SPANTMP_PIXEL_FMT GL_BGRA
447#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
448
449#define TAG(x)    radeon##x##_xRGB8888
450#define TAG2(x,y) radeon##x##_xRGB8888##y
451#if defined(RADEON_COMMON_FOR_R600)
452#define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0xff000000))
453#define PUT_VALUE(_x, _y, d) { \
454   GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off );		\
455   *_ptr = d;								\
456} while (0)
457#else
458#define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000))
459#define PUT_VALUE(_x, _y, d) { \
460   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
461   *_ptr = d;								\
462} while (0)
463#endif
464#include "spantmp2.h"
465
466/* 32 bit, ARGB8888 color spanline and pixel functions
467 */
468#define SPANTMP_PIXEL_FMT GL_BGRA
469#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV
470
471#define TAG(x)    radeon##x##_ARGB8888
472#define TAG2(x,y) radeon##x##_ARGB8888##y
473#if defined(RADEON_COMMON_FOR_R600)
474#define GET_VALUE(_x, _y) (*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)))
475#define PUT_VALUE(_x, _y, d) { \
476   GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off );		\
477   *_ptr = d;								\
478} while (0)
479#else
480#define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)))
481#define PUT_VALUE(_x, _y, d) { \
482   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
483   *_ptr = d;								\
484} while (0)
485#endif
486#include "spantmp2.h"
487
488/* ================================================================
489 * Depth buffer
490 */
491
492/* The Radeon family has depth tiling on all the time, so we have to convert
493 * the x,y coordinates into the memory bus address (mba) in the same
494 * manner as the engine.  In each case, the linear block address (ba)
495 * is calculated, and then wired with x and y to produce the final
496 * memory address.
497 * The chip will do address translation on its own if the surface registers
498 * are set up correctly. It is not quite enough to get it working with hyperz
499 * too...
500 */
501
502/* 16-bit depth buffer functions
503 */
504#define VALUE_TYPE GLushort
505
506#if defined(RADEON_COMMON_FOR_R200)
507#define WRITE_DEPTH( _x, _y, d )					\
508   *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d
509#elif defined(RADEON_COMMON_FOR_R600)
510#define WRITE_DEPTH( _x, _y, d )					\
511   *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) = d
512#else
513#define WRITE_DEPTH( _x, _y, d )					\
514   *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d
515#endif
516
517#if defined(RADEON_COMMON_FOR_R200)
518#define READ_DEPTH( d, _x, _y )						\
519   d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off)
520#elif defined(RADEON_COMMON_FOR_R600)
521#define READ_DEPTH( d, _x, _y )						\
522   d = *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off)
523#else
524#define READ_DEPTH( d, _x, _y )						\
525   d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off)
526#endif
527
528#define TAG(x) radeon##x##_z16
529#include "depthtmp.h"
530
531/* 24 bit depth
532 *
533 * Careful: It looks like the R300 uses ZZZS byte order while the R200
534 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
535 */
536#define VALUE_TYPE GLuint
537
538#if defined(COMPILE_R300)
539#define WRITE_DEPTH( _x, _y, d )					\
540do {									\
541   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
542   GLuint tmp = *_ptr;				\
543   tmp &= 0x000000ff;							\
544   tmp |= ((d << 8) & 0xffffff00);					\
545   *_ptr = tmp;					\
546} while (0)
547#elif defined(RADEON_COMMON_FOR_R600)
548#define WRITE_DEPTH( _x, _y, d )					\
549do {									\
550   GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off );		\
551   GLuint tmp = *_ptr;				\
552   tmp &= 0xff000000;							\
553   tmp |= ((d) & 0x00ffffff);					\
554   *_ptr = tmp;					\
555} while (0)
556#elif defined(RADEON_COMMON_FOR_R200)
557#define WRITE_DEPTH( _x, _y, d )					\
558do {									\
559   GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );		\
560   GLuint tmp = *_ptr;				\
561   tmp &= 0xff000000;							\
562   tmp |= ((d) & 0x00ffffff);						\
563   *_ptr = tmp;					\
564} while (0)
565#else
566#define WRITE_DEPTH( _x, _y, d )					\
567do {									\
568   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );	\
569   GLuint tmp = *_ptr;							\
570   tmp &= 0xff000000;							\
571   tmp |= ((d) & 0x00ffffff);						\
572   *_ptr = tmp;					\
573} while (0)
574#endif
575
576#if defined(COMPILE_R300)
577#define READ_DEPTH( d, _x, _y )						\
578  do {									\
579    d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \
580  }while(0)
581#elif defined(RADEON_COMMON_FOR_R600)
582#define READ_DEPTH( d, _x, _y )						\
583  do {									\
584    d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off)) & 0x00ffffff); \
585  }while(0)
586#elif defined(RADEON_COMMON_FOR_R200)
587#define READ_DEPTH( d, _x, _y )						\
588  do {									\
589    d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; \
590  }while(0)
591#else
592#define READ_DEPTH( d, _x, _y )	\
593  d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off,	_y + y_off)) & 0x00ffffff;
594#endif
595
596#define TAG(x) radeon##x##_z24
597#include "depthtmp.h"
598
599/* 24 bit depth, 8 bit stencil depthbuffer functions
600 * EXT_depth_stencil
601 *
602 * Careful: It looks like the R300 uses ZZZS byte order while the R200
603 * uses SZZZ for 24 bit depth, 8 bit stencil mode.
604 */
605#define VALUE_TYPE GLuint
606
607#if defined(COMPILE_R300)
608#define WRITE_DEPTH( _x, _y, d )					\
609do {									\
610   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
611   *_ptr = d;								\
612} while (0)
613#elif defined(RADEON_COMMON_FOR_R600)
614#define WRITE_DEPTH( _x, _y, d )					\
615do {									\
616   GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off );		\
617   GLuint tmp = *_ptr;				\
618   tmp &= 0xff000000;							\
619   tmp |= (((d) >> 8) & 0x00ffffff);					\
620   *_ptr = tmp;					\
621   _ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off);		\
622   tmp = *_ptr;				\
623   tmp &= 0xffffff00;							\
624   tmp |= (d) & 0xff;							\
625   *_ptr = tmp;					\
626} while (0)
627#elif defined(RADEON_COMMON_FOR_R200)
628#define WRITE_DEPTH( _x, _y, d )					\
629do {									\
630   GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );		\
631   GLuint tmp = z24s8_to_s8z24(d);					\
632   *_ptr = tmp;								\
633} while (0)
634#else
635#define WRITE_DEPTH( _x, _y, d )					\
636do {									\
637   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );	\
638   GLuint tmp = z24s8_to_s8z24(d);					\
639   *_ptr = tmp;					\
640} while (0)
641#endif
642
643#if defined(COMPILE_R300)
644#define READ_DEPTH( d, _x, _y )						\
645  do { \
646    d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)));	\
647  }while(0)
648#elif defined(RADEON_COMMON_FOR_R600)
649#define READ_DEPTH( d, _x, _y )						\
650  do { \
651    d = ((*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) << 8) & 0xffffff00; \
652    d |= (*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) & 0x000000ff;	\
653  }while(0)
654#elif defined(RADEON_COMMON_FOR_R200)
655#define READ_DEPTH( d, _x, _y )						\
656  do { \
657    d = s8z24_to_z24s8(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)));	\
658  }while(0)
659#else
660#define READ_DEPTH( d, _x, _y )	do {					\
661    d = s8z24_to_z24s8(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off,	_y + y_off ))); \
662  } while (0)
663#endif
664
665#define TAG(x) radeon##x##_z24_s8
666#include "depthtmp.h"
667
668/* ================================================================
669 * Stencil buffer
670 */
671
672/* 24 bit depth, 8 bit stencil depthbuffer functions
673 */
674#ifdef COMPILE_R300
675#define WRITE_STENCIL( _x, _y, d )					\
676do {									\
677   GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off);		\
678   GLuint tmp = *_ptr;				\
679   tmp &= 0xffffff00;							\
680   tmp |= (d) & 0xff;							\
681   *_ptr = tmp;					\
682} while (0)
683#elif defined(RADEON_COMMON_FOR_R600)
684#define WRITE_STENCIL( _x, _y, d )					\
685do {									\
686   GLuint *_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off);		\
687   GLuint tmp = *_ptr;				\
688   tmp &= 0xffffff00;							\
689   tmp |= (d) & 0xff;							\
690   *_ptr = tmp;					\
691} while (0)
692#elif defined(RADEON_COMMON_FOR_R200)
693#define WRITE_STENCIL( _x, _y, d )					\
694do {									\
695   GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off);		\
696   GLuint tmp = *_ptr;				\
697   tmp &= 0x00ffffff;							\
698   tmp |= (((d) & 0xff) << 24);						\
699   *_ptr = tmp;					\
700} while (0)
701#else
702#define WRITE_STENCIL( _x, _y, d )					\
703do {									\
704   GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off);		\
705   GLuint tmp = *_ptr;				\
706   tmp &= 0x00ffffff;							\
707   tmp |= (((d) & 0xff) << 24);						\
708   *_ptr = tmp;					\
709} while (0)
710#endif
711
712#ifdef COMPILE_R300
713#define READ_STENCIL( d, _x, _y )					\
714do {									\
715   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
716   GLuint tmp = *_ptr;				\
717   d = tmp & 0x000000ff;						\
718} while (0)
719#elif defined(RADEON_COMMON_FOR_R600)
720#define READ_STENCIL( d, _x, _y )					\
721do {									\
722   GLuint *_ptr = (GLuint*)r600_ptr_stencil( rrb, _x + x_off, _y + y_off );		\
723   GLuint tmp = *_ptr;				\
724   d = tmp & 0x000000ff;						\
725} while (0)
726#elif defined(RADEON_COMMON_FOR_R200)
727#define READ_STENCIL( d, _x, _y )					\
728do {									\
729   GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off );		\
730   GLuint tmp = *_ptr;				\
731   d = (tmp & 0xff000000) >> 24;					\
732} while (0)
733#else
734#define READ_STENCIL( d, _x, _y )					\
735do {									\
736   GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off );		\
737   GLuint tmp = *_ptr;				\
738   d = (tmp & 0xff000000) >> 24;					\
739} while (0)
740#endif
741
742#define TAG(x) radeon##x##_z24_s8
743#include "stenciltmp.h"
744
745
746static void map_unmap_rb(struct gl_renderbuffer *rb, int flag)
747{
748	struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb);
749	int r;
750
751	if (rrb == NULL || !rrb->bo)
752		return;
753
754	if (flag) {
755		if (rrb->bo->bom->funcs->bo_wait)
756			radeon_bo_wait(rrb->bo);
757		r = radeon_bo_map(rrb->bo, 1);
758		if (r) {
759			fprintf(stderr, "(%s) error(%d) mapping buffer.\n",
760				__FUNCTION__, r);
761		}
762
763		radeonSetSpanFunctions(rrb);
764	} else {
765		radeon_bo_unmap(rrb->bo);
766		rb->GetRow = NULL;
767		rb->PutRow = NULL;
768	}
769}
770
771static void
772radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map)
773{
774	GLuint i, j;
775
776	/* color draw buffers */
777	for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++)
778		map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map);
779
780	/* check for render to textures */
781	for (i = 0; i < BUFFER_COUNT; i++) {
782		struct gl_renderbuffer_attachment *att =
783			ctx->DrawBuffer->Attachment + i;
784		struct gl_texture_object *tex = att->Texture;
785		if (tex) {
786			/* Render to texture. Note that a mipmapped texture need not
787			 * be complete for render to texture, so we must restrict to
788			 * mapping only the attached image.
789			 */
790			radeon_texture_image *image = get_radeon_texture_image(tex->Image[att->CubeMapFace][att->TextureLevel]);
791			ASSERT(att->Renderbuffer);
792
793			if (map)
794				radeon_teximage_map(image, GL_TRUE);
795			else
796				radeon_teximage_unmap(image);
797		}
798	}
799
800	map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map);
801
802	/* depth buffer (Note wrapper!) */
803	if (ctx->DrawBuffer->_DepthBuffer)
804		map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map);
805
806	if (ctx->DrawBuffer->_StencilBuffer)
807		map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map);
808}
809
810static void radeonSpanRenderStart(GLcontext * ctx)
811{
812	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
813	int i;
814
815	radeon_firevertices(rmesa);
816
817	/* The locking and wait for idle should really only be needed in classic mode.
818	 * In a future memory manager based implementation, this should become
819	 * unnecessary due to the fact that mapping our buffers, textures, etc.
820	 * should implicitly wait for any previous rendering commands that must
821	 * be waited on. */
822	if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
823		LOCK_HARDWARE(rmesa);
824		radeonWaitForIdleLocked(rmesa);
825	}
826
827	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
828		if (ctx->Texture.Unit[i]._ReallyEnabled)
829			ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current);
830	}
831
832	radeon_map_unmap_buffers(ctx, 1);
833}
834
835static void radeonSpanRenderFinish(GLcontext * ctx)
836{
837	radeonContextPtr rmesa = RADEON_CONTEXT(ctx);
838	int i;
839	_swrast_flush(ctx);
840	if (!rmesa->radeonScreen->driScreen->dri2.enabled) {
841		UNLOCK_HARDWARE(rmesa);
842	}
843	for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) {
844		if (ctx->Texture.Unit[i]._ReallyEnabled)
845			ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current);
846	}
847
848	radeon_map_unmap_buffers(ctx, 0);
849}
850
851void radeonInitSpanFuncs(GLcontext * ctx)
852{
853	struct swrast_device_driver *swdd =
854	    _swrast_GetDeviceDriverReference(ctx);
855	swdd->SpanRenderStart = radeonSpanRenderStart;
856	swdd->SpanRenderFinish = radeonSpanRenderFinish;
857}
858
859/**
860 * Plug in the Get/Put routines for the given driRenderbuffer.
861 */
862static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb)
863{
864	if (rrb->base._ActualFormat == GL_RGB5) {
865		radeonInitPointers_RGB565(&rrb->base);
866	} else if (rrb->base._ActualFormat == GL_RGB8) {
867		radeonInitPointers_xRGB8888(&rrb->base);
868	} else if (rrb->base._ActualFormat == GL_RGBA8) {
869		radeonInitPointers_ARGB8888(&rrb->base);
870	} else if (rrb->base._ActualFormat == GL_RGBA4) {
871		radeonInitPointers_ARGB4444(&rrb->base);
872	} else if (rrb->base._ActualFormat == GL_RGB5_A1) {
873		radeonInitPointers_ARGB1555(&rrb->base);
874	} else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT16) {
875		radeonInitDepthPointers_z16(&rrb->base);
876	} else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT24) {
877		radeonInitDepthPointers_z24(&rrb->base);
878	} else if (rrb->base._ActualFormat == GL_DEPTH24_STENCIL8_EXT) {
879		radeonInitDepthPointers_z24_s8(&rrb->base);
880	} else if (rrb->base._ActualFormat == GL_STENCIL_INDEX8_EXT) {
881		radeonInitStencilPointers_z24_s8(&rrb->base);
882	} else {
883		fprintf(stderr, "radeonSetSpanFunctions: bad actual format: 0x%04X\n", rrb->base._ActualFormat);
884	}
885}
886