radeon_span.c revision 364ca57aff733e8ee5f417b3f8719514f443315f
1/************************************************************************** 2 3Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. 4Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and 5 VA Linux Systems Inc., Fremont, California. 6 7The Weather Channel (TM) funded Tungsten Graphics to develop the 8initial release of the Radeon 8500 driver under the XFree86 license. 9This notice must be preserved. 10 11All Rights Reserved. 12 13Permission is hereby granted, free of charge, to any person obtaining 14a copy of this software and associated documentation files (the 15"Software"), to deal in the Software without restriction, including 16without limitation the rights to use, copy, modify, merge, publish, 17distribute, sublicense, and/or sell copies of the Software, and to 18permit persons to whom the Software is furnished to do so, subject to 19the following conditions: 20 21The above copyright notice and this permission notice (including the 22next paragraph) shall be included in all copies or substantial 23portions of the Software. 24 25THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 28IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 29LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 30OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 31WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 32 33**************************************************************************/ 34 35/* 36 * Authors: 37 * Kevin E. Martin <martin@valinux.com> 38 * Gareth Hughes <gareth@valinux.com> 39 * Keith Whitwell <keith@tungstengraphics.com> 40 * 41 */ 42 43#include "main/glheader.h" 44#include "swrast/swrast.h" 45 46#include "radeon_common.h" 47#include "radeon_lock.h" 48#include "radeon_span.h" 49 50#define DBG 0 51 52static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb); 53 54 55/* r200 depth buffer is always tiled - this is the formula 56 according to the docs unless I typo'ed in it 57*/ 58#if defined(RADEON_COMMON_FOR_R200) 59static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb, 60 GLint x, GLint y) 61{ 62 GLubyte *ptr = rrb->bo->ptr; 63 GLint offset; 64 if (rrb->has_surface) { 65 offset = x * rrb->cpp + y * rrb->pitch; 66 } else { 67 GLuint b; 68 offset = 0; 69 b = (((y >> 4) * (rrb->pitch >> 8) + (x >> 6))); 70 offset += (b >> 1) << 12; 71 offset += (((rrb->pitch >> 8) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11; 72 offset += ((y >> 2) & 0x3) << 9; 73 offset += ((x >> 3) & 0x1) << 8; 74 offset += ((x >> 4) & 0x3) << 6; 75 offset += ((x >> 2) & 0x1) << 5; 76 offset += ((y >> 1) & 0x1) << 4; 77 offset += ((x >> 1) & 0x1) << 3; 78 offset += (y & 0x1) << 2; 79 offset += (x & 0x1) << 1; 80 } 81 return &ptr[offset]; 82} 83 84static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb, 85 GLint x, GLint y) 86{ 87 GLubyte *ptr = rrb->bo->ptr; 88 GLint offset; 89 if (rrb->has_surface) { 90 offset = x * rrb->cpp + y * rrb->pitch; 91 } else { 92 GLuint b; 93 offset = 0; 94 b = (((y & 0x7ff) >> 4) * (rrb->pitch >> 7) + (x >> 5)); 95 offset += (b >> 1) << 12; 96 offset += (((rrb->pitch >> 7) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11; 97 offset += ((y >> 2) & 0x3) << 9; 98 offset += ((x >> 2) & 0x1) << 8; 99 offset += ((x >> 3) & 0x3) << 6; 100 offset += ((y >> 1) & 0x1) << 5; 101 offset += ((x >> 1) & 0x1) << 4; 102 offset += (y & 0x1) << 3; 103 offset += (x & 0x1) << 2; 104 } 105 return &ptr[offset]; 106} 107#endif 108 109/* r600 tiling 110 * two main types: 111 * - 1D (akin to macro-linear/micro-tiled on older asics) 112 * - 2D (akin to macro-tiled/micro-tiled on older asics) 113 * only 1D tiling is implemented below 114 */ 115#if defined(RADEON_COMMON_FOR_R600) 116static GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb, 117 GLint x, GLint y, GLint is_depth, GLint is_stencil) 118{ 119 GLint element_bytes = rrb->cpp; 120 GLint num_samples = 1; 121 GLint tile_width = 8; 122 GLint tile_height = 8; 123 GLint tile_thickness = 1; 124 GLint pitch_elements = rrb->pitch / element_bytes; 125 GLint height = rrb->base.Height; 126 GLint z = 0; 127 GLint sample_number = 0; 128 /* */ 129 GLint tile_bytes; 130 GLint tiles_per_row; 131 GLint tiles_per_slice; 132 GLint slice_offset; 133 GLint tile_row_index; 134 GLint tile_column_index; 135 GLint tile_offset; 136 GLint pixel_number = 0; 137 GLint element_offset; 138 GLint offset = 0; 139 140 tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples; 141 tiles_per_row = pitch_elements /tile_width; 142 tiles_per_slice = tiles_per_row * (height / tile_height); 143 slice_offset = (z / tile_thickness) * tiles_per_slice * tile_bytes; 144 tile_row_index = y / tile_height; 145 tile_column_index = x / tile_width; 146 tile_offset = ((tile_row_index * tiles_per_row) + tile_column_index) * tile_bytes; 147 148 if (is_depth) { 149 GLint pixel_offset = 0; 150 151 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] 152 pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0] 153 pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1] 154 pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1] 155 pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2] 156 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] 157 switch (element_bytes) { 158 case 2: 159 pixel_offset = pixel_number * element_bytes * num_samples; 160 element_offset = pixel_offset + (sample_number * element_bytes); 161 break; 162 case 4: 163 /* stencil and depth data are stored separately within a tile. 164 * stencil is stored in a contiguous tile before the depth tile. 165 * stencil element is 1 byte, depth element is 3 bytes. 166 * stencil tile is 64 bytes. 167 */ 168 if (is_stencil) 169 pixel_offset = pixel_number * 1 * num_samples; 170 else 171 pixel_offset = (pixel_number * 3 * num_samples) + 64; 172 break; 173 } 174 element_offset = pixel_offset + (sample_number * element_bytes); 175 } else { 176 GLint sample_offset; 177 178 switch (element_bytes) { 179 case 1: 180 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] 181 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1] 182 pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2] 183 pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1] 184 pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0] 185 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] 186 break; 187 case 2: 188 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] 189 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1] 190 pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2] 191 pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0] 192 pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1] 193 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] 194 break; 195 case 4: 196 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] 197 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1] 198 pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0] 199 pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2] 200 pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1] 201 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] 202 break; 203 } 204 sample_offset = sample_number * (tile_bytes / num_samples); 205 element_offset = sample_offset + (pixel_number * element_bytes); 206 } 207 offset = slice_offset + tile_offset + element_offset; 208 return offset; 209} 210 211/* depth buffers */ 212static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb, 213 GLint x, GLint y) 214{ 215 GLubyte *ptr = rrb->bo->ptr; 216 GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 0); 217 return &ptr[offset]; 218} 219 220static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb, 221 GLint x, GLint y) 222{ 223 GLubyte *ptr = rrb->bo->ptr; 224 GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 1); 225 return &ptr[offset]; 226} 227 228static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb, 229 GLint x, GLint y) 230{ 231 GLubyte *ptr = rrb->bo->ptr; 232 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; 233 GLint offset; 234 235 if (rrb->has_surface || !(rrb->bo->flags & mask)) { 236 offset = x * rrb->cpp + y * rrb->pitch; 237 } else { 238 offset = r600_1d_tile_helper(rrb, x, y, 0, 0); 239 } 240 return &ptr[offset]; 241} 242 243#endif 244 245/* radeon tiling on r300-r500 has 4 states, 246 macro-linear/micro-linear 247 macro-linear/micro-tiled 248 macro-tiled /micro-linear 249 macro-tiled /micro-tiled 250 1 byte surface 251 2 byte surface - two types - we only provide 8x2 microtiling 252 4 byte surface 253 8/16 byte (unused) 254*/ 255static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb, 256 GLint x, GLint y) 257{ 258 GLubyte *ptr = rrb->bo->ptr; 259 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; 260 GLint offset; 261 262 if (rrb->has_surface || !(rrb->bo->flags & mask)) { 263 offset = x * rrb->cpp + y * rrb->pitch; 264 } else { 265 offset = 0; 266 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) { 267 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) { 268 offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 5)) << 11; 269 offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 10; 270 offset += (((y >> 4) ^ (x >> 4)) & 0x1) << 9; 271 offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 8; 272 offset += (((y >> 3) ^ (x >> 3)) & 0x1) << 7; 273 offset += ((y >> 1) & 0x1) << 6; 274 offset += ((x >> 2) & 0x1) << 5; 275 offset += (y & 1) << 4; 276 offset += (x & 3) << 2; 277 } else { 278 offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 6)) << 11; 279 offset += (((y >> 2) ^ (x >> 6)) & 0x1) << 10; 280 offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 9; 281 offset += (((y >> 1) ^ (x >> 5)) & 0x1) << 8; 282 offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 7; 283 offset += (y & 1) << 6; 284 offset += (x & 15) << 2; 285 } 286 } else { 287 offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 2)) << 5; 288 offset += (y & 1) << 4; 289 offset += (x & 3) << 2; 290 } 291 } 292 return &ptr[offset]; 293} 294 295static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb, 296 GLint x, GLint y) 297{ 298 GLubyte *ptr = rrb->bo->ptr; 299 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; 300 GLint offset; 301 302 if (rrb->has_surface || !(rrb->bo->flags & mask)) { 303 offset = x * rrb->cpp + y * rrb->pitch; 304 } else { 305 offset = 0; 306 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) { 307 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) { 308 offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 6)) << 11; 309 offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 10; 310 offset += (((y >> 4) ^ (x >> 5)) & 0x1) << 9; 311 offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 8; 312 offset += (((y >> 3) ^ (x >> 4)) & 0x1) << 7; 313 offset += ((y >> 1) & 0x1) << 6; 314 offset += ((x >> 3) & 0x1) << 5; 315 offset += (y & 1) << 4; 316 offset += (x & 3) << 2; 317 } else { 318 offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 7)) << 11; 319 offset += (((y >> 2) ^ (x >> 7)) & 0x1) << 10; 320 offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 9; 321 offset += (((y >> 1) ^ (x >> 6)) & 0x1) << 8; 322 offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 7; 323 offset += (y & 1) << 6; 324 offset += ((x >> 4) & 0x1) << 5; 325 offset += (x & 15) << 2; 326 } 327 } else { 328 offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 3)) << 5; 329 offset += (y & 0x1) << 4; 330 offset += (x & 0x7) << 1; 331 } 332 } 333 return &ptr[offset]; 334} 335 336#ifndef COMPILE_R300 337static uint32_t 338z24s8_to_s8z24(uint32_t val) 339{ 340 return (val << 24) | (val >> 8); 341} 342 343static uint32_t 344s8z24_to_z24s8(uint32_t val) 345{ 346 return (val >> 24) | (val << 8); 347} 348#endif 349 350/* 351 * Note that all information needed to access pixels in a renderbuffer 352 * should be obtained through the gl_renderbuffer parameter, not per-context 353 * information. 354 */ 355#define LOCAL_VARS \ 356 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \ 357 struct radeon_renderbuffer *rrb = (void *) rb; \ 358 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \ 359 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\ 360 unsigned int num_cliprects; \ 361 struct drm_clip_rect *cliprects; \ 362 int x_off, y_off; \ 363 GLuint p; \ 364 (void)p; \ 365 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off); 366 367#define LOCAL_DEPTH_VARS \ 368 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \ 369 struct radeon_renderbuffer *rrb = (void *) rb; \ 370 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \ 371 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\ 372 unsigned int num_cliprects; \ 373 struct drm_clip_rect *cliprects; \ 374 int x_off, y_off; \ 375 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off); 376 377#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS 378 379#define Y_FLIP(_y) ((_y) * yScale + yBias) 380 381#define HW_LOCK() 382 383#define HW_UNLOCK() 384 385/* XXX FBO: this is identical to the macro in spantmp2.h except we get 386 * the cliprect info from the context, not the driDrawable. 387 * Move this into spantmp2.h someday. 388 */ 389#define HW_CLIPLOOP() \ 390 do { \ 391 int _nc = num_cliprects; \ 392 while ( _nc-- ) { \ 393 int minx = cliprects[_nc].x1 - x_off; \ 394 int miny = cliprects[_nc].y1 - y_off; \ 395 int maxx = cliprects[_nc].x2 - x_off; \ 396 int maxy = cliprects[_nc].y2 - y_off; 397 398/* ================================================================ 399 * Color buffer 400 */ 401 402/* 16 bit, RGB565 color spanline and pixel functions 403 */ 404#define SPANTMP_PIXEL_FMT GL_RGB 405#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 406 407#define TAG(x) radeon##x##_RGB565 408#define TAG2(x,y) radeon##x##_RGB565##y 409#if defined(RADEON_COMMON_FOR_R600) 410#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off) 411#else 412#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) 413#endif 414#include "spantmp2.h" 415 416/* 16 bit, ARGB1555 color spanline and pixel functions 417 */ 418#define SPANTMP_PIXEL_FMT GL_BGRA 419#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV 420 421#define TAG(x) radeon##x##_ARGB1555 422#define TAG2(x,y) radeon##x##_ARGB1555##y 423#if defined(RADEON_COMMON_FOR_R600) 424#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off) 425#else 426#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) 427#endif 428#include "spantmp2.h" 429 430/* 16 bit, RGBA4 color spanline and pixel functions 431 */ 432#define SPANTMP_PIXEL_FMT GL_BGRA 433#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV 434 435#define TAG(x) radeon##x##_ARGB4444 436#define TAG2(x,y) radeon##x##_ARGB4444##y 437#if defined(RADEON_COMMON_FOR_R600) 438#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off) 439#else 440#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) 441#endif 442#include "spantmp2.h" 443 444/* 32 bit, xRGB8888 color spanline and pixel functions 445 */ 446#define SPANTMP_PIXEL_FMT GL_BGRA 447#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV 448 449#define TAG(x) radeon##x##_xRGB8888 450#define TAG2(x,y) radeon##x##_xRGB8888##y 451#if defined(RADEON_COMMON_FOR_R600) 452#define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0xff000000)) 453#define PUT_VALUE(_x, _y, d) { \ 454 GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \ 455 *_ptr = d; \ 456} while (0) 457#else 458#define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000)) 459#define PUT_VALUE(_x, _y, d) { \ 460 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ 461 *_ptr = d; \ 462} while (0) 463#endif 464#include "spantmp2.h" 465 466/* 32 bit, ARGB8888 color spanline and pixel functions 467 */ 468#define SPANTMP_PIXEL_FMT GL_BGRA 469#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV 470 471#define TAG(x) radeon##x##_ARGB8888 472#define TAG2(x,y) radeon##x##_ARGB8888##y 473#if defined(RADEON_COMMON_FOR_R600) 474#define GET_VALUE(_x, _y) (*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off))) 475#define PUT_VALUE(_x, _y, d) { \ 476 GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \ 477 *_ptr = d; \ 478} while (0) 479#else 480#define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))) 481#define PUT_VALUE(_x, _y, d) { \ 482 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ 483 *_ptr = d; \ 484} while (0) 485#endif 486#include "spantmp2.h" 487 488/* ================================================================ 489 * Depth buffer 490 */ 491 492/* The Radeon family has depth tiling on all the time, so we have to convert 493 * the x,y coordinates into the memory bus address (mba) in the same 494 * manner as the engine. In each case, the linear block address (ba) 495 * is calculated, and then wired with x and y to produce the final 496 * memory address. 497 * The chip will do address translation on its own if the surface registers 498 * are set up correctly. It is not quite enough to get it working with hyperz 499 * too... 500 */ 501 502/* 16-bit depth buffer functions 503 */ 504#define VALUE_TYPE GLushort 505 506#if defined(RADEON_COMMON_FOR_R200) 507#define WRITE_DEPTH( _x, _y, d ) \ 508 *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d 509#elif defined(RADEON_COMMON_FOR_R600) 510#define WRITE_DEPTH( _x, _y, d ) \ 511 *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) = d 512#else 513#define WRITE_DEPTH( _x, _y, d ) \ 514 *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d 515#endif 516 517#if defined(RADEON_COMMON_FOR_R200) 518#define READ_DEPTH( d, _x, _y ) \ 519 d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) 520#elif defined(RADEON_COMMON_FOR_R600) 521#define READ_DEPTH( d, _x, _y ) \ 522 d = *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) 523#else 524#define READ_DEPTH( d, _x, _y ) \ 525 d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) 526#endif 527 528#define TAG(x) radeon##x##_z16 529#include "depthtmp.h" 530 531/* 24 bit depth 532 * 533 * Careful: It looks like the R300 uses ZZZS byte order while the R200 534 * uses SZZZ for 24 bit depth, 8 bit stencil mode. 535 */ 536#define VALUE_TYPE GLuint 537 538#if defined(COMPILE_R300) 539#define WRITE_DEPTH( _x, _y, d ) \ 540do { \ 541 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ 542 GLuint tmp = *_ptr; \ 543 tmp &= 0x000000ff; \ 544 tmp |= ((d << 8) & 0xffffff00); \ 545 *_ptr = tmp; \ 546} while (0) 547#elif defined(RADEON_COMMON_FOR_R600) 548#define WRITE_DEPTH( _x, _y, d ) \ 549do { \ 550 GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \ 551 GLuint tmp = *_ptr; \ 552 tmp &= 0xff000000; \ 553 tmp |= ((d) & 0x00ffffff); \ 554 *_ptr = tmp; \ 555} while (0) 556#elif defined(RADEON_COMMON_FOR_R200) 557#define WRITE_DEPTH( _x, _y, d ) \ 558do { \ 559 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \ 560 GLuint tmp = *_ptr; \ 561 tmp &= 0xff000000; \ 562 tmp |= ((d) & 0x00ffffff); \ 563 *_ptr = tmp; \ 564} while (0) 565#else 566#define WRITE_DEPTH( _x, _y, d ) \ 567do { \ 568 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ 569 GLuint tmp = *_ptr; \ 570 tmp &= 0xff000000; \ 571 tmp |= ((d) & 0x00ffffff); \ 572 *_ptr = tmp; \ 573} while (0) 574#endif 575 576#if defined(COMPILE_R300) 577#define READ_DEPTH( d, _x, _y ) \ 578 do { \ 579 d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \ 580 }while(0) 581#elif defined(RADEON_COMMON_FOR_R600) 582#define READ_DEPTH( d, _x, _y ) \ 583 do { \ 584 d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off)) & 0x00ffffff); \ 585 }while(0) 586#elif defined(RADEON_COMMON_FOR_R200) 587#define READ_DEPTH( d, _x, _y ) \ 588 do { \ 589 d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; \ 590 }while(0) 591#else 592#define READ_DEPTH( d, _x, _y ) \ 593 d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; 594#endif 595 596#define TAG(x) radeon##x##_z24 597#include "depthtmp.h" 598 599/* 24 bit depth, 8 bit stencil depthbuffer functions 600 * EXT_depth_stencil 601 * 602 * Careful: It looks like the R300 uses ZZZS byte order while the R200 603 * uses SZZZ for 24 bit depth, 8 bit stencil mode. 604 */ 605#define VALUE_TYPE GLuint 606 607#if defined(COMPILE_R300) 608#define WRITE_DEPTH( _x, _y, d ) \ 609do { \ 610 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ 611 *_ptr = d; \ 612} while (0) 613#elif defined(RADEON_COMMON_FOR_R600) 614#define WRITE_DEPTH( _x, _y, d ) \ 615do { \ 616 GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \ 617 GLuint tmp = *_ptr; \ 618 tmp &= 0xff000000; \ 619 tmp |= (((d) >> 8) & 0x00ffffff); \ 620 *_ptr = tmp; \ 621 _ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \ 622 tmp = *_ptr; \ 623 tmp &= 0xffffff00; \ 624 tmp |= (d) & 0xff; \ 625 *_ptr = tmp; \ 626} while (0) 627#elif defined(RADEON_COMMON_FOR_R200) 628#define WRITE_DEPTH( _x, _y, d ) \ 629do { \ 630 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \ 631 GLuint tmp = z24s8_to_s8z24(d); \ 632 *_ptr = tmp; \ 633} while (0) 634#else 635#define WRITE_DEPTH( _x, _y, d ) \ 636do { \ 637 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ 638 GLuint tmp = z24s8_to_s8z24(d); \ 639 *_ptr = tmp; \ 640} while (0) 641#endif 642 643#if defined(COMPILE_R300) 644#define READ_DEPTH( d, _x, _y ) \ 645 do { \ 646 d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \ 647 }while(0) 648#elif defined(RADEON_COMMON_FOR_R600) 649#define READ_DEPTH( d, _x, _y ) \ 650 do { \ 651 d = ((*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) << 8) & 0xffffff00; \ 652 d |= (*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) & 0x000000ff; \ 653 }while(0) 654#elif defined(RADEON_COMMON_FOR_R200) 655#define READ_DEPTH( d, _x, _y ) \ 656 do { \ 657 d = s8z24_to_z24s8(*(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off))); \ 658 }while(0) 659#else 660#define READ_DEPTH( d, _x, _y ) do { \ 661 d = s8z24_to_z24s8(*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off ))); \ 662 } while (0) 663#endif 664 665#define TAG(x) radeon##x##_z24_s8 666#include "depthtmp.h" 667 668/* ================================================================ 669 * Stencil buffer 670 */ 671 672/* 24 bit depth, 8 bit stencil depthbuffer functions 673 */ 674#ifdef COMPILE_R300 675#define WRITE_STENCIL( _x, _y, d ) \ 676do { \ 677 GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \ 678 GLuint tmp = *_ptr; \ 679 tmp &= 0xffffff00; \ 680 tmp |= (d) & 0xff; \ 681 *_ptr = tmp; \ 682} while (0) 683#elif defined(RADEON_COMMON_FOR_R600) 684#define WRITE_STENCIL( _x, _y, d ) \ 685do { \ 686 GLuint *_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \ 687 GLuint tmp = *_ptr; \ 688 tmp &= 0xffffff00; \ 689 tmp |= (d) & 0xff; \ 690 *_ptr = tmp; \ 691} while (0) 692#elif defined(RADEON_COMMON_FOR_R200) 693#define WRITE_STENCIL( _x, _y, d ) \ 694do { \ 695 GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off); \ 696 GLuint tmp = *_ptr; \ 697 tmp &= 0x00ffffff; \ 698 tmp |= (((d) & 0xff) << 24); \ 699 *_ptr = tmp; \ 700} while (0) 701#else 702#define WRITE_STENCIL( _x, _y, d ) \ 703do { \ 704 GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \ 705 GLuint tmp = *_ptr; \ 706 tmp &= 0x00ffffff; \ 707 tmp |= (((d) & 0xff) << 24); \ 708 *_ptr = tmp; \ 709} while (0) 710#endif 711 712#ifdef COMPILE_R300 713#define READ_STENCIL( d, _x, _y ) \ 714do { \ 715 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ 716 GLuint tmp = *_ptr; \ 717 d = tmp & 0x000000ff; \ 718} while (0) 719#elif defined(RADEON_COMMON_FOR_R600) 720#define READ_STENCIL( d, _x, _y ) \ 721do { \ 722 GLuint *_ptr = (GLuint*)r600_ptr_stencil( rrb, _x + x_off, _y + y_off ); \ 723 GLuint tmp = *_ptr; \ 724 d = tmp & 0x000000ff; \ 725} while (0) 726#elif defined(RADEON_COMMON_FOR_R200) 727#define READ_STENCIL( d, _x, _y ) \ 728do { \ 729 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \ 730 GLuint tmp = *_ptr; \ 731 d = (tmp & 0xff000000) >> 24; \ 732} while (0) 733#else 734#define READ_STENCIL( d, _x, _y ) \ 735do { \ 736 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ 737 GLuint tmp = *_ptr; \ 738 d = (tmp & 0xff000000) >> 24; \ 739} while (0) 740#endif 741 742#define TAG(x) radeon##x##_z24_s8 743#include "stenciltmp.h" 744 745 746static void map_unmap_rb(struct gl_renderbuffer *rb, int flag) 747{ 748 struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb); 749 int r; 750 751 if (rrb == NULL || !rrb->bo) 752 return; 753 754 if (flag) { 755 if (rrb->bo->bom->funcs->bo_wait) 756 radeon_bo_wait(rrb->bo); 757 r = radeon_bo_map(rrb->bo, 1); 758 if (r) { 759 fprintf(stderr, "(%s) error(%d) mapping buffer.\n", 760 __FUNCTION__, r); 761 } 762 763 radeonSetSpanFunctions(rrb); 764 } else { 765 radeon_bo_unmap(rrb->bo); 766 rb->GetRow = NULL; 767 rb->PutRow = NULL; 768 } 769} 770 771static void 772radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map) 773{ 774 GLuint i, j; 775 776 /* color draw buffers */ 777 for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++) 778 map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map); 779 780 /* check for render to textures */ 781 for (i = 0; i < BUFFER_COUNT; i++) { 782 struct gl_renderbuffer_attachment *att = 783 ctx->DrawBuffer->Attachment + i; 784 struct gl_texture_object *tex = att->Texture; 785 if (tex) { 786 /* Render to texture. Note that a mipmapped texture need not 787 * be complete for render to texture, so we must restrict to 788 * mapping only the attached image. 789 */ 790 radeon_texture_image *image = get_radeon_texture_image(tex->Image[att->CubeMapFace][att->TextureLevel]); 791 ASSERT(att->Renderbuffer); 792 793 if (map) 794 radeon_teximage_map(image, GL_TRUE); 795 else 796 radeon_teximage_unmap(image); 797 } 798 } 799 800 map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map); 801 802 /* depth buffer (Note wrapper!) */ 803 if (ctx->DrawBuffer->_DepthBuffer) 804 map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map); 805 806 if (ctx->DrawBuffer->_StencilBuffer) 807 map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map); 808} 809 810static void radeonSpanRenderStart(GLcontext * ctx) 811{ 812 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 813 int i; 814 815 radeon_firevertices(rmesa); 816 817 /* The locking and wait for idle should really only be needed in classic mode. 818 * In a future memory manager based implementation, this should become 819 * unnecessary due to the fact that mapping our buffers, textures, etc. 820 * should implicitly wait for any previous rendering commands that must 821 * be waited on. */ 822 if (!rmesa->radeonScreen->driScreen->dri2.enabled) { 823 LOCK_HARDWARE(rmesa); 824 radeonWaitForIdleLocked(rmesa); 825 } 826 827 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 828 if (ctx->Texture.Unit[i]._ReallyEnabled) 829 ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current); 830 } 831 832 radeon_map_unmap_buffers(ctx, 1); 833} 834 835static void radeonSpanRenderFinish(GLcontext * ctx) 836{ 837 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 838 int i; 839 _swrast_flush(ctx); 840 if (!rmesa->radeonScreen->driScreen->dri2.enabled) { 841 UNLOCK_HARDWARE(rmesa); 842 } 843 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 844 if (ctx->Texture.Unit[i]._ReallyEnabled) 845 ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current); 846 } 847 848 radeon_map_unmap_buffers(ctx, 0); 849} 850 851void radeonInitSpanFuncs(GLcontext * ctx) 852{ 853 struct swrast_device_driver *swdd = 854 _swrast_GetDeviceDriverReference(ctx); 855 swdd->SpanRenderStart = radeonSpanRenderStart; 856 swdd->SpanRenderFinish = radeonSpanRenderFinish; 857} 858 859/** 860 * Plug in the Get/Put routines for the given driRenderbuffer. 861 */ 862static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb) 863{ 864 if (rrb->base._ActualFormat == GL_RGB5) { 865 radeonInitPointers_RGB565(&rrb->base); 866 } else if (rrb->base._ActualFormat == GL_RGB8) { 867 radeonInitPointers_xRGB8888(&rrb->base); 868 } else if (rrb->base._ActualFormat == GL_RGBA8) { 869 radeonInitPointers_ARGB8888(&rrb->base); 870 } else if (rrb->base._ActualFormat == GL_RGBA4) { 871 radeonInitPointers_ARGB4444(&rrb->base); 872 } else if (rrb->base._ActualFormat == GL_RGB5_A1) { 873 radeonInitPointers_ARGB1555(&rrb->base); 874 } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT16) { 875 radeonInitDepthPointers_z16(&rrb->base); 876 } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT24) { 877 radeonInitDepthPointers_z24(&rrb->base); 878 } else if (rrb->base._ActualFormat == GL_DEPTH24_STENCIL8_EXT) { 879 radeonInitDepthPointers_z24_s8(&rrb->base); 880 } else if (rrb->base._ActualFormat == GL_STENCIL_INDEX8_EXT) { 881 radeonInitStencilPointers_z24_s8(&rrb->base); 882 } else { 883 fprintf(stderr, "radeonSetSpanFunctions: bad actual format: 0x%04X\n", rrb->base._ActualFormat); 884 } 885} 886