radeon_span.c revision 22627654256ee09bfd659624568865a79eb725b6
1/************************************************************************** 2 3Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. 4Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and 5 VA Linux Systems Inc., Fremont, California. 6 7The Weather Channel (TM) funded Tungsten Graphics to develop the 8initial release of the Radeon 8500 driver under the XFree86 license. 9This notice must be preserved. 10 11All Rights Reserved. 12 13Permission is hereby granted, free of charge, to any person obtaining 14a copy of this software and associated documentation files (the 15"Software"), to deal in the Software without restriction, including 16without limitation the rights to use, copy, modify, merge, publish, 17distribute, sublicense, and/or sell copies of the Software, and to 18permit persons to whom the Software is furnished to do so, subject to 19the following conditions: 20 21The above copyright notice and this permission notice (including the 22next paragraph) shall be included in all copies or substantial 23portions of the Software. 24 25THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 28IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 29LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 30OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 31WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 32 33**************************************************************************/ 34 35/* 36 * Authors: 37 * Kevin E. Martin <martin@valinux.com> 38 * Gareth Hughes <gareth@valinux.com> 39 * Keith Whitwell <keith@tungstengraphics.com> 40 * 41 */ 42 43#include "main/glheader.h" 44#include "main/texformat.h" 45#include "swrast/swrast.h" 46 47#include "radeon_common.h" 48#include "radeon_lock.h" 49#include "radeon_span.h" 50 51#define DBG 0 52 53static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb); 54 55 56/* r200 depth buffer is always tiled - this is the formula 57 according to the docs unless I typo'ed in it 58*/ 59#if defined(RADEON_R200) 60static GLubyte *r200_depth_2byte(const struct radeon_renderbuffer * rrb, 61 GLint x, GLint y) 62{ 63 GLubyte *ptr = rrb->bo->ptr; 64 GLint offset; 65 if (rrb->has_surface) { 66 offset = x * rrb->cpp + y * rrb->pitch; 67 } else { 68 GLuint b; 69 offset = 0; 70 b = (((y >> 4) * (rrb->pitch >> 8) + (x >> 6))); 71 offset += (b >> 1) << 12; 72 offset += (((rrb->pitch >> 8) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11; 73 offset += ((y >> 2) & 0x3) << 9; 74 offset += ((x >> 3) & 0x1) << 8; 75 offset += ((x >> 4) & 0x3) << 6; 76 offset += ((x >> 2) & 0x1) << 5; 77 offset += ((y >> 1) & 0x1) << 4; 78 offset += ((x >> 1) & 0x1) << 3; 79 offset += (y & 0x1) << 2; 80 offset += (x & 0x1) << 1; 81 } 82 return &ptr[offset]; 83} 84 85static GLubyte *r200_depth_4byte(const struct radeon_renderbuffer * rrb, 86 GLint x, GLint y) 87{ 88 GLubyte *ptr = rrb->bo->ptr; 89 GLint offset; 90 if (rrb->has_surface) { 91 offset = x * rrb->cpp + y * rrb->pitch; 92 } else { 93 GLuint b; 94 offset = 0; 95 b = (((y & 0x7ff) >> 4) * (rrb->pitch >> 7) + (x >> 5)); 96 offset += (b >> 1) << 12; 97 offset += (((rrb->pitch >> 7) & 0x1) ? (b & 0x1) : ((b & 0x1) ^ ((y >> 4) & 0x1))) << 11; 98 offset += ((y >> 2) & 0x3) << 9; 99 offset += ((x >> 2) & 0x1) << 8; 100 offset += ((x >> 3) & 0x3) << 6; 101 offset += ((y >> 1) & 0x1) << 5; 102 offset += ((x >> 1) & 0x1) << 4; 103 offset += (y & 0x1) << 3; 104 offset += (x & 0x1) << 2; 105 } 106 return &ptr[offset]; 107} 108#endif 109 110/* r600 tiling 111 * two main types: 112 * - 1D (akin to macro-linear/micro-tiled on older asics) 113 * - 2D (akin to macro-tiled/micro-tiled on older asics) 114 * only 1D tiling is implemented below 115 */ 116#if defined(RADEON_R600) 117static inline GLint r600_1d_tile_helper(const struct radeon_renderbuffer * rrb, 118 GLint x, GLint y, GLint is_depth, GLint is_stencil) 119{ 120 GLint element_bytes = rrb->cpp; 121 GLint num_samples = 1; 122 GLint tile_width = 8; 123 GLint tile_height = 8; 124 GLint tile_thickness = 1; 125 GLint pitch_elements = rrb->pitch / element_bytes; 126 GLint height = rrb->base.Height; 127 GLint z = 0; 128 GLint sample_number = 0; 129 /* */ 130 GLint tile_bytes; 131 GLint tiles_per_row; 132 GLint tiles_per_slice; 133 GLint slice_offset; 134 GLint tile_row_index; 135 GLint tile_column_index; 136 GLint tile_offset; 137 GLint pixel_number = 0; 138 GLint element_offset; 139 GLint offset = 0; 140 141 tile_bytes = tile_width * tile_height * tile_thickness * element_bytes * num_samples; 142 tiles_per_row = pitch_elements / tile_width; 143 tiles_per_slice = tiles_per_row * (height / tile_height); 144 slice_offset = (z / tile_thickness) * tiles_per_slice * tile_bytes; 145 tile_row_index = y / tile_height; 146 tile_column_index = x / tile_width; 147 tile_offset = ((tile_row_index * tiles_per_row) + tile_column_index) * tile_bytes; 148 149 if (is_depth) { 150 GLint pixel_offset = 0; 151 152 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] 153 pixel_number |= ((y >> 0) & 1) << 1; // pn[1] = y[0] 154 pixel_number |= ((x >> 1) & 1) << 2; // pn[2] = x[1] 155 pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1] 156 pixel_number |= ((x >> 2) & 1) << 4; // pn[4] = x[2] 157 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] 158 switch (element_bytes) { 159 case 2: 160 pixel_offset = pixel_number * element_bytes * num_samples; 161 break; 162 case 4: 163 /* stencil and depth data are stored separately within a tile. 164 * stencil is stored in a contiguous tile before the depth tile. 165 * stencil element is 1 byte, depth element is 3 bytes. 166 * stencil tile is 64 bytes. 167 */ 168 if (is_stencil) 169 pixel_offset = pixel_number * 1 * num_samples; 170 else 171 pixel_offset = (pixel_number * 3 * num_samples) + 64; 172 break; 173 } 174 element_offset = pixel_offset + (sample_number * element_bytes); 175 } else { 176 GLint sample_offset; 177 178 switch (element_bytes) { 179 case 1: 180 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] 181 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1] 182 pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2] 183 pixel_number |= ((y >> 1) & 1) << 3; // pn[3] = y[1] 184 pixel_number |= ((y >> 0) & 1) << 4; // pn[4] = y[0] 185 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] 186 break; 187 case 2: 188 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] 189 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1] 190 pixel_number |= ((x >> 2) & 1) << 2; // pn[2] = x[2] 191 pixel_number |= ((y >> 0) & 1) << 3; // pn[3] = y[0] 192 pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1] 193 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] 194 break; 195 case 4: 196 pixel_number |= ((x >> 0) & 1) << 0; // pn[0] = x[0] 197 pixel_number |= ((x >> 1) & 1) << 1; // pn[1] = x[1] 198 pixel_number |= ((y >> 0) & 1) << 2; // pn[2] = y[0] 199 pixel_number |= ((x >> 2) & 1) << 3; // pn[3] = x[2] 200 pixel_number |= ((y >> 1) & 1) << 4; // pn[4] = y[1] 201 pixel_number |= ((y >> 2) & 1) << 5; // pn[5] = y[2] 202 break; 203 } 204 sample_offset = sample_number * (tile_bytes / num_samples); 205 element_offset = sample_offset + (pixel_number * element_bytes); 206 } 207 offset = slice_offset + tile_offset + element_offset; 208 return offset; 209} 210 211/* depth buffers */ 212static GLubyte *r600_ptr_depth(const struct radeon_renderbuffer * rrb, 213 GLint x, GLint y) 214{ 215 GLubyte *ptr = rrb->bo->ptr; 216 GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 0); 217 return &ptr[offset]; 218} 219 220static GLubyte *r600_ptr_stencil(const struct radeon_renderbuffer * rrb, 221 GLint x, GLint y) 222{ 223 GLubyte *ptr = rrb->bo->ptr; 224 GLint offset = r600_1d_tile_helper(rrb, x, y, 1, 1); 225 return &ptr[offset]; 226} 227 228static GLubyte *r600_ptr_color(const struct radeon_renderbuffer * rrb, 229 GLint x, GLint y) 230{ 231 GLubyte *ptr = rrb->bo->ptr; 232 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; 233 GLint offset; 234 235 if (rrb->has_surface || !(rrb->bo->flags & mask)) { 236 offset = x * rrb->cpp + y * rrb->pitch; 237 } else { 238 offset = r600_1d_tile_helper(rrb, x, y, 0, 0); 239 } 240 return &ptr[offset]; 241} 242 243#else 244 245/* radeon tiling on r300-r500 has 4 states, 246 macro-linear/micro-linear 247 macro-linear/micro-tiled 248 macro-tiled /micro-linear 249 macro-tiled /micro-tiled 250 1 byte surface 251 2 byte surface - two types - we only provide 8x2 microtiling 252 4 byte surface 253 8/16 byte (unused) 254*/ 255static GLubyte *radeon_ptr_4byte(const struct radeon_renderbuffer * rrb, 256 GLint x, GLint y) 257{ 258 GLubyte *ptr = rrb->bo->ptr; 259 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; 260 GLint offset; 261 262 if (rrb->has_surface || !(rrb->bo->flags & mask)) { 263 offset = x * rrb->cpp + y * rrb->pitch; 264 } else { 265 offset = 0; 266 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) { 267 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) { 268 offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 5)) << 11; 269 offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 10; 270 offset += (((y >> 4) ^ (x >> 4)) & 0x1) << 9; 271 offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 8; 272 offset += (((y >> 3) ^ (x >> 3)) & 0x1) << 7; 273 offset += ((y >> 1) & 0x1) << 6; 274 offset += ((x >> 2) & 0x1) << 5; 275 offset += (y & 1) << 4; 276 offset += (x & 3) << 2; 277 } else { 278 offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 6)) << 11; 279 offset += (((y >> 2) ^ (x >> 6)) & 0x1) << 10; 280 offset += (((y >> 3) ^ (x >> 5)) & 0x1) << 9; 281 offset += (((y >> 1) ^ (x >> 5)) & 0x1) << 8; 282 offset += (((y >> 2) ^ (x >> 4)) & 0x1) << 7; 283 offset += (y & 1) << 6; 284 offset += (x & 15) << 2; 285 } 286 } else { 287 offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 2)) << 5; 288 offset += (y & 1) << 4; 289 offset += (x & 3) << 2; 290 } 291 } 292 return &ptr[offset]; 293} 294 295static GLubyte *radeon_ptr_2byte_8x2(const struct radeon_renderbuffer * rrb, 296 GLint x, GLint y) 297{ 298 GLubyte *ptr = rrb->bo->ptr; 299 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; 300 GLint offset; 301 302 if (rrb->has_surface || !(rrb->bo->flags & mask)) { 303 offset = x * rrb->cpp + y * rrb->pitch; 304 } else { 305 offset = 0; 306 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) { 307 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) { 308 offset = ((y >> 4) * (rrb->pitch >> 7) + (x >> 6)) << 11; 309 offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 10; 310 offset += (((y >> 4) ^ (x >> 5)) & 0x1) << 9; 311 offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 8; 312 offset += (((y >> 3) ^ (x >> 4)) & 0x1) << 7; 313 offset += ((y >> 1) & 0x1) << 6; 314 offset += ((x >> 3) & 0x1) << 5; 315 offset += (y & 1) << 4; 316 offset += (x & 3) << 2; 317 } else { 318 offset = ((y >> 3) * (rrb->pitch >> 8) + (x >> 7)) << 11; 319 offset += (((y >> 2) ^ (x >> 7)) & 0x1) << 10; 320 offset += (((y >> 3) ^ (x >> 6)) & 0x1) << 9; 321 offset += (((y >> 1) ^ (x >> 6)) & 0x1) << 8; 322 offset += (((y >> 2) ^ (x >> 5)) & 0x1) << 7; 323 offset += (y & 1) << 6; 324 offset += ((x >> 4) & 0x1) << 5; 325 offset += (x & 15) << 2; 326 } 327 } else { 328 offset = ((y >> 1) * (rrb->pitch >> 4) + (x >> 3)) << 5; 329 offset += (y & 0x1) << 4; 330 offset += (x & 0x7) << 1; 331 } 332 } 333 return &ptr[offset]; 334} 335 336#endif 337 338/* 339 * Note that all information needed to access pixels in a renderbuffer 340 * should be obtained through the gl_renderbuffer parameter, not per-context 341 * information. 342 */ 343#define LOCAL_VARS \ 344 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \ 345 struct radeon_renderbuffer *rrb = (void *) rb; \ 346 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \ 347 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\ 348 unsigned int num_cliprects; \ 349 struct drm_clip_rect *cliprects; \ 350 int x_off, y_off; \ 351 GLuint p; \ 352 (void)p; \ 353 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off); 354 355#define LOCAL_DEPTH_VARS \ 356 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \ 357 struct radeon_renderbuffer *rrb = (void *) rb; \ 358 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \ 359 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\ 360 unsigned int num_cliprects; \ 361 struct drm_clip_rect *cliprects; \ 362 int x_off, y_off; \ 363 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off); 364 365#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS 366 367#define Y_FLIP(_y) ((_y) * yScale + yBias) 368 369#define HW_LOCK() 370 371#define HW_UNLOCK() 372 373/* XXX FBO: this is identical to the macro in spantmp2.h except we get 374 * the cliprect info from the context, not the driDrawable. 375 * Move this into spantmp2.h someday. 376 */ 377#define HW_CLIPLOOP() \ 378 do { \ 379 int _nc = num_cliprects; \ 380 while ( _nc-- ) { \ 381 int minx = cliprects[_nc].x1 - x_off; \ 382 int miny = cliprects[_nc].y1 - y_off; \ 383 int maxx = cliprects[_nc].x2 - x_off; \ 384 int maxy = cliprects[_nc].y2 - y_off; 385 386/* ================================================================ 387 * Color buffer 388 */ 389 390/* 16 bit, RGB565 color spanline and pixel functions 391 */ 392#define SPANTMP_PIXEL_FMT GL_RGB 393#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 394 395#define TAG(x) radeon##x##_RGB565 396#define TAG2(x,y) radeon##x##_RGB565##y 397#if defined(RADEON_R600) 398#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off) 399#else 400#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) 401#endif 402#include "spantmp2.h" 403 404#define SPANTMP_PIXEL_FMT GL_RGB 405#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5_REV 406 407#define TAG(x) radeon##x##_RGB565_REV 408#define TAG2(x,y) radeon##x##_RGB565_REV##y 409#if defined(RADEON_R600) 410#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off) 411#else 412#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) 413#endif 414#include "spantmp2.h" 415 416/* 16 bit, ARGB1555 color spanline and pixel functions 417 */ 418#define SPANTMP_PIXEL_FMT GL_BGRA 419#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5_REV 420 421#define TAG(x) radeon##x##_ARGB1555 422#define TAG2(x,y) radeon##x##_ARGB1555##y 423#if defined(RADEON_R600) 424#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off) 425#else 426#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) 427#endif 428#include "spantmp2.h" 429 430#define SPANTMP_PIXEL_FMT GL_BGRA 431#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_1_5_5_5 432 433#define TAG(x) radeon##x##_ARGB1555_REV 434#define TAG2(x,y) radeon##x##_ARGB1555_REV##y 435#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) 436#include "spantmp2.h" 437 438/* 16 bit, RGBA4 color spanline and pixel functions 439 */ 440#define SPANTMP_PIXEL_FMT GL_BGRA 441#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4_REV 442 443#define TAG(x) radeon##x##_ARGB4444 444#define TAG2(x,y) radeon##x##_ARGB4444##y 445#if defined(RADEON_R600) 446#define GET_PTR(X,Y) r600_ptr_color(rrb, (X) + x_off, (Y) + y_off) 447#else 448#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) 449#endif 450#include "spantmp2.h" 451 452#define SPANTMP_PIXEL_FMT GL_BGRA 453#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_4_4_4_4 454 455#define TAG(x) radeon##x##_ARGB4444_REV 456#define TAG2(x,y) radeon##x##_ARGB4444_REV##y 457#define GET_PTR(X,Y) radeon_ptr_2byte_8x2(rrb, (X) + x_off, (Y) + y_off) 458#include "spantmp2.h" 459 460/* 32 bit, xRGB8888 color spanline and pixel functions 461 */ 462#define SPANTMP_PIXEL_FMT GL_BGRA 463#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV 464 465#define TAG(x) radeon##x##_xRGB8888 466#define TAG2(x,y) radeon##x##_xRGB8888##y 467#if defined(RADEON_R600) 468#define GET_VALUE(_x, _y) ((*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off)) | 0xff000000)) 469#define PUT_VALUE(_x, _y, d) { \ 470 GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \ 471 *_ptr = d; \ 472} while (0) 473#else 474#define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0xff000000)) 475#define PUT_VALUE(_x, _y, d) { \ 476 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ 477 *_ptr = d; \ 478} while (0) 479#endif 480#include "spantmp2.h" 481 482/* 32 bit, ARGB8888 color spanline and pixel functions 483 */ 484#define SPANTMP_PIXEL_FMT GL_BGRA 485#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV 486 487#define TAG(x) radeon##x##_ARGB8888 488#define TAG2(x,y) radeon##x##_ARGB8888##y 489#if defined(RADEON_R600) 490#define GET_VALUE(_x, _y) (*(GLuint*)(r600_ptr_color(rrb, _x + x_off, _y + y_off))) 491#define PUT_VALUE(_x, _y, d) { \ 492 GLuint *_ptr = (GLuint*)r600_ptr_color( rrb, _x + x_off, _y + y_off ); \ 493 *_ptr = d; \ 494} while (0) 495#else 496#define GET_VALUE(_x, _y) (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))) 497#define PUT_VALUE(_x, _y, d) { \ 498 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ 499 *_ptr = d; \ 500} while (0) 501#endif 502#include "spantmp2.h" 503 504/* 32 bit, BGRx8888 color spanline and pixel functions 505 */ 506#define SPANTMP_PIXEL_FMT GL_BGRA 507#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8 508 509#define TAG(x) radeon##x##_BGRx8888 510#define TAG2(x,y) radeon##x##_BGRx8888##y 511#define GET_VALUE(_x, _y) ((*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) | 0x000000ff)) 512#define PUT_VALUE(_x, _y, d) { \ 513 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ 514 *_ptr = d; \ 515} while (0) 516#include "spantmp2.h" 517 518/* 32 bit, BGRA8888 color spanline and pixel functions 519 */ 520#define SPANTMP_PIXEL_FMT GL_BGRA 521#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8 522 523#define TAG(x) radeon##x##_BGRA8888 524#define TAG2(x,y) radeon##x##_BGRA8888##y 525#define GET_PTR(X,Y) radeon_ptr_4byte(rrb, (X) + x_off, (Y) + y_off) 526#include "spantmp2.h" 527 528/* ================================================================ 529 * Depth buffer 530 */ 531 532/* The Radeon family has depth tiling on all the time, so we have to convert 533 * the x,y coordinates into the memory bus address (mba) in the same 534 * manner as the engine. In each case, the linear block address (ba) 535 * is calculated, and then wired with x and y to produce the final 536 * memory address. 537 * The chip will do address translation on its own if the surface registers 538 * are set up correctly. It is not quite enough to get it working with hyperz 539 * too... 540 */ 541 542/* 16-bit depth buffer functions 543 */ 544#define VALUE_TYPE GLushort 545 546#if defined(RADEON_R200) 547#define WRITE_DEPTH( _x, _y, d ) \ 548 *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) = d 549#elif defined(RADEON_R600) 550#define WRITE_DEPTH( _x, _y, d ) \ 551 *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) = d 552#else 553#define WRITE_DEPTH( _x, _y, d ) \ 554 *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) = d 555#endif 556 557#if defined(RADEON_R200) 558#define READ_DEPTH( d, _x, _y ) \ 559 d = *(GLushort *)r200_depth_2byte(rrb, _x + x_off, _y + y_off) 560#elif defined(RADEON_R600) 561#define READ_DEPTH( d, _x, _y ) \ 562 d = *(GLushort *)r600_ptr_depth(rrb, _x + x_off, _y + y_off) 563#else 564#define READ_DEPTH( d, _x, _y ) \ 565 d = *(GLushort *)radeon_ptr_2byte_8x2(rrb, _x + x_off, _y + y_off) 566#endif 567 568#define TAG(x) radeon##x##_z16 569#include "depthtmp.h" 570 571/* 24 bit depth 572 * 573 * Careful: It looks like the R300 uses ZZZS byte order while the R200 574 * uses SZZZ for 24 bit depth, 8 bit stencil mode. 575 */ 576#define VALUE_TYPE GLuint 577 578#if defined(RADEON_R300) 579#define WRITE_DEPTH( _x, _y, d ) \ 580do { \ 581 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ 582 GLuint tmp = *_ptr; \ 583 tmp &= 0x000000ff; \ 584 tmp |= ((d << 8) & 0xffffff00); \ 585 *_ptr = tmp; \ 586} while (0) 587#elif defined(RADEON_R600) 588#define WRITE_DEPTH( _x, _y, d ) \ 589do { \ 590 GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \ 591 GLuint tmp = *_ptr; \ 592 tmp &= 0xff000000; \ 593 tmp |= ((d) & 0x00ffffff); \ 594 *_ptr = tmp; \ 595} while (0) 596#elif defined(RADEON_R200) 597#define WRITE_DEPTH( _x, _y, d ) \ 598do { \ 599 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \ 600 GLuint tmp = *_ptr; \ 601 tmp &= 0xff000000; \ 602 tmp |= ((d) & 0x00ffffff); \ 603 *_ptr = tmp; \ 604} while (0) 605#else 606#define WRITE_DEPTH( _x, _y, d ) \ 607do { \ 608 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ 609 GLuint tmp = *_ptr; \ 610 tmp &= 0xff000000; \ 611 tmp |= ((d) & 0x00ffffff); \ 612 *_ptr = tmp; \ 613} while (0) 614#endif 615 616#if defined(RADEON_R300) 617#define READ_DEPTH( d, _x, _y ) \ 618 do { \ 619 d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \ 620 }while(0) 621#elif defined(RADEON_R600) 622#define READ_DEPTH( d, _x, _y ) \ 623 do { \ 624 d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off)) & 0x00ffffff); \ 625 }while(0) 626#elif defined(RADEON_R200) 627#define READ_DEPTH( d, _x, _y ) \ 628 do { \ 629 d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; \ 630 }while(0) 631#else 632#define READ_DEPTH( d, _x, _y ) \ 633 d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off)) & 0x00ffffff; 634#endif 635 636#define TAG(x) radeon##x##_z24 637#include "depthtmp.h" 638 639/* 24 bit depth, 8 bit stencil depthbuffer functions 640 * EXT_depth_stencil 641 * 642 * Careful: It looks like the R300 uses ZZZS byte order while the R200 643 * uses SZZZ for 24 bit depth, 8 bit stencil mode. 644 */ 645#define VALUE_TYPE GLuint 646 647#if defined(RADEON_R300) 648#define WRITE_DEPTH( _x, _y, d ) \ 649do { \ 650 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ 651 *_ptr = d; \ 652} while (0) 653#elif defined(RADEON_R600) 654#define WRITE_DEPTH( _x, _y, d ) \ 655do { \ 656 GLuint *_ptr = (GLuint*)r600_ptr_depth( rrb, _x + x_off, _y + y_off ); \ 657 GLuint tmp = *_ptr; \ 658 tmp &= 0xff000000; \ 659 tmp |= ((d) & 0x00ffffff); \ 660 *_ptr = tmp; \ 661 _ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \ 662 tmp = *_ptr; \ 663 tmp &= 0xffffff00; \ 664 tmp |= ((d) >> 24) & 0xff; \ 665 *_ptr = tmp; \ 666} while (0) 667#elif defined(RADEON_R200) 668#define WRITE_DEPTH( _x, _y, d ) \ 669do { \ 670 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \ 671 *_ptr = d; \ 672} while (0) 673#else 674#define WRITE_DEPTH( _x, _y, d ) \ 675do { \ 676 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ 677 *_ptr = d; \ 678} while (0) 679#endif 680 681#if defined(RADEON_R300) 682#define READ_DEPTH( d, _x, _y ) \ 683 do { \ 684 d = (*(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off))); \ 685 }while(0) 686#elif defined(RADEON_R600) 687#define READ_DEPTH( d, _x, _y ) \ 688 do { \ 689 d = (*(GLuint*)(r600_ptr_depth(rrb, _x + x_off, _y + y_off))) & 0x00ffffff; \ 690 d |= ((*(GLuint*)(r600_ptr_stencil(rrb, _x + x_off, _y + y_off))) << 24) & 0xff000000; \ 691 }while(0) 692#elif defined(RADEON_R200) 693#define READ_DEPTH( d, _x, _y ) \ 694 do { \ 695 d = *(GLuint*)(r200_depth_4byte(rrb, _x + x_off, _y + y_off)); \ 696 }while(0) 697#else 698#define READ_DEPTH( d, _x, _y ) do { \ 699 d = *(GLuint*)(radeon_ptr_4byte(rrb, _x + x_off, _y + y_off )); \ 700 } while (0) 701#endif 702 703#define TAG(x) radeon##x##_s8_z24 704#include "depthtmp.h" 705 706/* ================================================================ 707 * Stencil buffer 708 */ 709 710/* 24 bit depth, 8 bit stencil depthbuffer functions 711 */ 712#ifdef RADEON_R300 713#define WRITE_STENCIL( _x, _y, d ) \ 714do { \ 715 GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \ 716 GLuint tmp = *_ptr; \ 717 tmp &= 0xffffff00; \ 718 tmp |= (d) & 0xff; \ 719 *_ptr = tmp; \ 720} while (0) 721#elif defined(RADEON_R600) 722#define WRITE_STENCIL( _x, _y, d ) \ 723do { \ 724 GLuint *_ptr = (GLuint*)r600_ptr_stencil(rrb, _x + x_off, _y + y_off); \ 725 GLuint tmp = *_ptr; \ 726 tmp &= 0xffffff00; \ 727 tmp |= (d) & 0xff; \ 728 *_ptr = tmp; \ 729} while (0) 730#elif defined(RADEON_R200) 731#define WRITE_STENCIL( _x, _y, d ) \ 732do { \ 733 GLuint *_ptr = (GLuint*)r200_depth_4byte(rrb, _x + x_off, _y + y_off); \ 734 GLuint tmp = *_ptr; \ 735 tmp &= 0x00ffffff; \ 736 tmp |= (((d) & 0xff) << 24); \ 737 *_ptr = tmp; \ 738} while (0) 739#else 740#define WRITE_STENCIL( _x, _y, d ) \ 741do { \ 742 GLuint *_ptr = (GLuint*)radeon_ptr_4byte(rrb, _x + x_off, _y + y_off); \ 743 GLuint tmp = *_ptr; \ 744 tmp &= 0x00ffffff; \ 745 tmp |= (((d) & 0xff) << 24); \ 746 *_ptr = tmp; \ 747} while (0) 748#endif 749 750#ifdef RADEON_R300 751#define READ_STENCIL( d, _x, _y ) \ 752do { \ 753 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ 754 GLuint tmp = *_ptr; \ 755 d = tmp & 0x000000ff; \ 756} while (0) 757#elif defined(RADEON_R600) 758#define READ_STENCIL( d, _x, _y ) \ 759do { \ 760 GLuint *_ptr = (GLuint*)r600_ptr_stencil( rrb, _x + x_off, _y + y_off ); \ 761 GLuint tmp = *_ptr; \ 762 d = tmp & 0x000000ff; \ 763} while (0) 764#elif defined(RADEON_R200) 765#define READ_STENCIL( d, _x, _y ) \ 766do { \ 767 GLuint *_ptr = (GLuint*)r200_depth_4byte( rrb, _x + x_off, _y + y_off ); \ 768 GLuint tmp = *_ptr; \ 769 d = (tmp & 0xff000000) >> 24; \ 770} while (0) 771#else 772#define READ_STENCIL( d, _x, _y ) \ 773do { \ 774 GLuint *_ptr = (GLuint*)radeon_ptr_4byte( rrb, _x + x_off, _y + y_off ); \ 775 GLuint tmp = *_ptr; \ 776 d = (tmp & 0xff000000) >> 24; \ 777} while (0) 778#endif 779 780#define TAG(x) radeon##x##_s8_z24 781#include "stenciltmp.h" 782 783 784static void map_unmap_rb(struct gl_renderbuffer *rb, int flag) 785{ 786 struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb); 787 int r; 788 789 if (rrb == NULL || !rrb->bo) 790 return; 791 792 if (flag) { 793 if (rrb->bo->bom->funcs->bo_wait) 794 radeon_bo_wait(rrb->bo); 795 r = radeon_bo_map(rrb->bo, 1); 796 if (r) { 797 fprintf(stderr, "(%s) error(%d) mapping buffer.\n", 798 __FUNCTION__, r); 799 } 800 801 radeonSetSpanFunctions(rrb); 802 } else { 803 radeon_bo_unmap(rrb->bo); 804 rb->GetRow = NULL; 805 rb->PutRow = NULL; 806 } 807} 808 809static void 810radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map) 811{ 812 GLuint i, j; 813 814 /* color draw buffers */ 815 for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++) 816 map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map); 817 818 /* check for render to textures */ 819 for (i = 0; i < BUFFER_COUNT; i++) { 820 struct gl_renderbuffer_attachment *att = 821 ctx->DrawBuffer->Attachment + i; 822 struct gl_texture_object *tex = att->Texture; 823 if (tex) { 824 /* Render to texture. Note that a mipmapped texture need not 825 * be complete for render to texture, so we must restrict to 826 * mapping only the attached image. 827 */ 828 radeon_texture_image *image = get_radeon_texture_image(tex->Image[att->CubeMapFace][att->TextureLevel]); 829 ASSERT(att->Renderbuffer); 830 831 if (map) 832 radeon_teximage_map(image, GL_TRUE); 833 else 834 radeon_teximage_unmap(image); 835 } 836 } 837 838 map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map); 839 840 /* depth buffer (Note wrapper!) */ 841 if (ctx->DrawBuffer->_DepthBuffer) 842 map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map); 843 844 if (ctx->DrawBuffer->_StencilBuffer) 845 map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map); 846} 847 848static void radeonSpanRenderStart(GLcontext * ctx) 849{ 850 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 851 int i; 852 853 radeon_firevertices(rmesa); 854 855 /* The locking and wait for idle should really only be needed in classic mode. 856 * In a future memory manager based implementation, this should become 857 * unnecessary due to the fact that mapping our buffers, textures, etc. 858 * should implicitly wait for any previous rendering commands that must 859 * be waited on. */ 860 if (!rmesa->radeonScreen->driScreen->dri2.enabled) { 861 LOCK_HARDWARE(rmesa); 862 radeonWaitForIdleLocked(rmesa); 863 } 864 865 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 866 if (ctx->Texture.Unit[i]._ReallyEnabled) 867 ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current); 868 } 869 870 radeon_map_unmap_buffers(ctx, 1); 871} 872 873static void radeonSpanRenderFinish(GLcontext * ctx) 874{ 875 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 876 int i; 877 _swrast_flush(ctx); 878 if (!rmesa->radeonScreen->driScreen->dri2.enabled) { 879 UNLOCK_HARDWARE(rmesa); 880 } 881 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 882 if (ctx->Texture.Unit[i]._ReallyEnabled) 883 ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current); 884 } 885 886 radeon_map_unmap_buffers(ctx, 0); 887} 888 889void radeonInitSpanFuncs(GLcontext * ctx) 890{ 891 struct swrast_device_driver *swdd = 892 _swrast_GetDeviceDriverReference(ctx); 893 swdd->SpanRenderStart = radeonSpanRenderStart; 894 swdd->SpanRenderFinish = radeonSpanRenderFinish; 895} 896 897/** 898 * Plug in the Get/Put routines for the given driRenderbuffer. 899 */ 900static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb) 901{ 902 if (rrb->base.Format == MESA_FORMAT_RGB565) { 903 radeonInitPointers_RGB565(&rrb->base); 904 } else if (rrb->base.Format == MESA_FORMAT_RGB565_REV) { 905 radeonInitPointers_RGB565_REV(&rrb->base); 906 } else if (rrb->base.Format == MESA_FORMAT_XRGB8888) { 907 radeonInitPointers_xRGB8888(&rrb->base); 908 } else if (rrb->base.Format == MESA_FORMAT_XRGB8888_REV) { 909 radeonInitPointers_BGRx8888(&rrb->base); 910 } else if (rrb->base.Format == MESA_FORMAT_ARGB8888) { 911 radeonInitPointers_ARGB8888(&rrb->base); 912 } else if (rrb->base.Format == MESA_FORMAT_ARGB8888_REV) { 913 radeonInitPointers_BGRA8888(&rrb->base); 914 } else if (rrb->base.Format == MESA_FORMAT_ARGB4444) { 915 radeonInitPointers_ARGB4444(&rrb->base); 916 } else if (rrb->base.Format == MESA_FORMAT_ARGB4444_REV) { 917 radeonInitPointers_ARGB4444_REV(&rrb->base); 918 } else if (rrb->base.Format == MESA_FORMAT_ARGB1555) { 919 radeonInitPointers_ARGB1555(&rrb->base); 920 } else if (rrb->base.Format == MESA_FORMAT_ARGB1555_REV) { 921 radeonInitPointers_ARGB1555_REV(&rrb->base); 922 } else if (rrb->base.Format == MESA_FORMAT_Z16) { 923 radeonInitDepthPointers_z16(&rrb->base); 924 } else if (rrb->base.Format == MESA_FORMAT_X8_Z24) { 925 radeonInitDepthPointers_z24(&rrb->base); 926 } else if (rrb->base.Format == MESA_FORMAT_S8_Z24) { 927 radeonInitDepthPointers_s8_z24(&rrb->base); 928 } else if (rrb->base.Format == MESA_FORMAT_S8) { 929 radeonInitStencilPointers_s8_z24(&rrb->base); 930 } else { 931 fprintf(stderr, "radeonSetSpanFunctions: bad format: 0x%04X\n", rrb->base.Format); 932 } 933} 934