radeon_span.c revision f577c8e462fc924ea436d129ad64c8a1226b5f9c
1/************************************************************************** 2 3Copyright (C) The Weather Channel, Inc. 2002. All Rights Reserved. 4Copyright 2000, 2001 ATI Technologies Inc., Ontario, Canada, and 5 VA Linux Systems Inc., Fremont, California. 6 7The Weather Channel (TM) funded Tungsten Graphics to develop the 8initial release of the Radeon 8500 driver under the XFree86 license. 9This notice must be preserved. 10 11All Rights Reserved. 12 13Permission is hereby granted, free of charge, to any person obtaining 14a copy of this software and associated documentation files (the 15"Software"), to deal in the Software without restriction, including 16without limitation the rights to use, copy, modify, merge, publish, 17distribute, sublicense, and/or sell copies of the Software, and to 18permit persons to whom the Software is furnished to do so, subject to 19the following conditions: 20 21The above copyright notice and this permission notice (including the 22next paragraph) shall be included in all copies or substantial 23portions of the Software. 24 25THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 26EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 27MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 28IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 29LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 30OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 31WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 32 33**************************************************************************/ 34 35/* 36 * Authors: 37 * Kevin E. Martin <martin@valinux.com> 38 * Gareth Hughes <gareth@valinux.com> 39 * Keith Whitwell <keith@tungstengraphics.com> 40 * 41 */ 42 43#include "main/glheader.h" 44#include "swrast/swrast.h" 45 46#include "radeon_common.h" 47#include "radeon_lock.h" 48#include "radeon_span.h" 49 50#define DBG 0 51 52static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb); 53 54static GLubyte *radeon_ptr32(const struct radeon_renderbuffer * rrb, 55 GLint x, GLint y) 56{ 57 GLubyte *ptr = rrb->bo->ptr; 58 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; 59 GLint offset; 60 GLint nmacroblkpl; 61 GLint nmicroblkpl; 62 63 if (rrb->has_surface || !(rrb->bo->flags & mask)) { 64 offset = x * rrb->cpp + y * rrb->pitch; 65 } else { 66 offset = 0; 67 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) { 68 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) { 69 nmacroblkpl = rrb->pitch >> 5; 70 offset += ((y >> 4) * nmacroblkpl) << 11; 71 offset += ((y & 15) >> 1) << 8; 72 offset += (y & 1) << 4; 73 offset += (x >> 5) << 11; 74 offset += ((x & 31) >> 2) << 5; 75 offset += (x & 3) << 2; 76 } else { 77 nmacroblkpl = rrb->pitch >> 6; 78 offset += ((y >> 3) * nmacroblkpl) << 11; 79 offset += (y & 7) << 8; 80 offset += (x >> 6) << 11; 81 offset += ((x & 63) >> 3) << 5; 82 offset += (x & 7) << 2; 83 } 84 } else { 85 nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5; 86 offset += (y * nmicroblkpl) << 5; 87 offset += (x >> 3) << 5; 88 offset += (x & 7) << 2; 89 } 90 } 91 return &ptr[offset]; 92} 93 94static GLubyte *radeon_ptr16(const struct radeon_renderbuffer * rrb, 95 GLint x, GLint y) 96{ 97 GLubyte *ptr = rrb->bo->ptr; 98 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; 99 GLint offset; 100 GLint nmacroblkpl; 101 GLint nmicroblkpl; 102 103 if (rrb->has_surface || !(rrb->bo->flags & mask)) { 104 offset = x * rrb->cpp + y * rrb->pitch; 105 } else { 106 offset = 0; 107 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) { 108 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) { 109 nmacroblkpl = rrb->pitch >> 6; 110 offset += ((y >> 4) * nmacroblkpl) << 11; 111 offset += ((y & 15) >> 1) << 8; 112 offset += (y & 1) << 4; 113 offset += (x >> 6) << 11; 114 offset += ((x & 63) >> 3) << 5; 115 offset += (x & 7) << 1; 116 } else { 117 nmacroblkpl = rrb->pitch >> 7; 118 offset += ((y >> 3) * nmacroblkpl) << 11; 119 offset += (y & 7) << 8; 120 offset += (x >> 7) << 11; 121 offset += ((x & 127) >> 4) << 5; 122 offset += (x & 15) << 2; 123 } 124 } else { 125 nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5; 126 offset += (y * nmicroblkpl) << 5; 127 offset += (x >> 4) << 5; 128 offset += (x & 15) << 2; 129 } 130 } 131 return &ptr[offset]; 132} 133 134static GLubyte *radeon_ptr(const struct radeon_renderbuffer * rrb, 135 GLint x, GLint y) 136{ 137 GLubyte *ptr = rrb->bo->ptr; 138 uint32_t mask = RADEON_BO_FLAGS_MACRO_TILE | RADEON_BO_FLAGS_MICRO_TILE; 139 GLint offset; 140 GLint microblkxs; 141 GLint macroblkxs; 142 GLint nmacroblkpl; 143 GLint nmicroblkpl; 144 145 if (rrb->has_surface || !(rrb->bo->flags & mask)) { 146 offset = x * rrb->cpp + y * rrb->pitch; 147 } else { 148 offset = 0; 149 if (rrb->bo->flags & RADEON_BO_FLAGS_MACRO_TILE) { 150 if (rrb->bo->flags & RADEON_BO_FLAGS_MICRO_TILE) { 151 microblkxs = 16 / rrb->cpp; 152 macroblkxs = 128 / rrb->cpp; 153 nmacroblkpl = rrb->pitch / macroblkxs; 154 offset += ((y >> 4) * nmacroblkpl) << 11; 155 offset += ((y & 15) >> 1) << 8; 156 offset += (y & 1) << 4; 157 offset += (x / macroblkxs) << 11; 158 offset += ((x & (macroblkxs - 1)) / microblkxs) << 5; 159 offset += (x & (microblkxs - 1)) * rrb->cpp; 160 } else { 161 microblkxs = 32 / rrb->cpp; 162 macroblkxs = 256 / rrb->cpp; 163 nmacroblkpl = rrb->pitch / macroblkxs; 164 offset += ((y >> 3) * nmacroblkpl) << 11; 165 offset += (y & 7) << 8; 166 offset += (x / macroblkxs) << 11; 167 offset += ((x & (macroblkxs - 1)) / microblkxs) << 5; 168 offset += (x & (microblkxs - 1)) * rrb->cpp; 169 } 170 } else { 171 microblkxs = 32 / rrb->cpp; 172 nmicroblkpl = ((rrb->pitch + 31) & ~31) >> 5; 173 offset += (y * nmicroblkpl) << 5; 174 offset += (x / microblkxs) << 5; 175 offset += (x & (microblkxs - 1)) * rrb->cpp; 176 } 177 } 178 return &ptr[offset]; 179} 180 181 182/* 183 * Note that all information needed to access pixels in a renderbuffer 184 * should be obtained through the gl_renderbuffer parameter, not per-context 185 * information. 186 */ 187#define LOCAL_VARS \ 188 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \ 189 struct radeon_renderbuffer *rrb = (void *) rb; \ 190 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \ 191 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\ 192 unsigned int num_cliprects; \ 193 struct drm_clip_rect *cliprects; \ 194 int x_off, y_off; \ 195 GLuint p; \ 196 (void)p; \ 197 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off); 198 199#define LOCAL_DEPTH_VARS \ 200 struct radeon_context *radeon = RADEON_CONTEXT(ctx); \ 201 struct radeon_renderbuffer *rrb = (void *) rb; \ 202 const GLint yScale = ctx->DrawBuffer->Name ? 1 : -1; \ 203 const GLint yBias = ctx->DrawBuffer->Name ? 0 : rrb->base.Height - 1;\ 204 unsigned int num_cliprects; \ 205 struct drm_clip_rect *cliprects; \ 206 int x_off, y_off; \ 207 radeon_get_cliprects(radeon, &cliprects, &num_cliprects, &x_off, &y_off); 208 209#define LOCAL_STENCIL_VARS LOCAL_DEPTH_VARS 210 211#define Y_FLIP(_y) ((_y) * yScale + yBias) 212 213#define HW_LOCK() 214 215#define HW_UNLOCK() 216 217/* XXX FBO: this is identical to the macro in spantmp2.h except we get 218 * the cliprect info from the context, not the driDrawable. 219 * Move this into spantmp2.h someday. 220 */ 221#define HW_CLIPLOOP() \ 222 do { \ 223 int _nc = num_cliprects; \ 224 while ( _nc-- ) { \ 225 int minx = cliprects[_nc].x1 - x_off; \ 226 int miny = cliprects[_nc].y1 - y_off; \ 227 int maxx = cliprects[_nc].x2 - x_off; \ 228 int maxy = cliprects[_nc].y2 - y_off; 229 230/* ================================================================ 231 * Color buffer 232 */ 233 234/* 16 bit, RGB565 color spanline and pixel functions 235 */ 236#define SPANTMP_PIXEL_FMT GL_RGB 237#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_SHORT_5_6_5 238 239#define TAG(x) radeon##x##_RGB565 240#define TAG2(x,y) radeon##x##_RGB565##y 241#define GET_PTR(X,Y) radeon_ptr16(rrb, (X) + x_off, (Y) + y_off) 242#include "spantmp2.h" 243 244/* 32 bit, xRGB8888 color spanline and pixel functions 245 */ 246#define SPANTMP_PIXEL_FMT GL_BGRA 247#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV 248 249#define TAG(x) radeon##x##_xRGB8888 250#define TAG2(x,y) radeon##x##_xRGB8888##y 251#define GET_PTR(X,Y) radeon_ptr32(rrb, (X) + x_off, (Y) + y_off) 252#include "spantmp2.h" 253 254/* 32 bit, ARGB8888 color spanline and pixel functions 255 */ 256#define SPANTMP_PIXEL_FMT GL_BGRA 257#define SPANTMP_PIXEL_TYPE GL_UNSIGNED_INT_8_8_8_8_REV 258 259#define TAG(x) radeon##x##_ARGB8888 260#define TAG2(x,y) radeon##x##_ARGB8888##y 261#define GET_PTR(X,Y) radeon_ptr32(rrb, (X) + x_off, (Y) + y_off) 262#include "spantmp2.h" 263 264/* ================================================================ 265 * Depth buffer 266 */ 267 268/* The Radeon family has depth tiling on all the time, so we have to convert 269 * the x,y coordinates into the memory bus address (mba) in the same 270 * manner as the engine. In each case, the linear block address (ba) 271 * is calculated, and then wired with x and y to produce the final 272 * memory address. 273 * The chip will do address translation on its own if the surface registers 274 * are set up correctly. It is not quite enough to get it working with hyperz 275 * too... 276 */ 277 278/* 16-bit depth buffer functions 279 */ 280#define VALUE_TYPE GLushort 281 282#define WRITE_DEPTH( _x, _y, d ) \ 283 *(GLushort *)radeon_ptr(rrb, _x + x_off, _y + y_off) = d 284 285#define READ_DEPTH( d, _x, _y ) \ 286 d = *(GLushort *)radeon_ptr(rrb, _x + x_off, _y + y_off) 287 288#define TAG(x) radeon##x##_z16 289#include "depthtmp.h" 290 291/* 24 bit depth, 8 bit stencil depthbuffer functions 292 * 293 * Careful: It looks like the R300 uses ZZZS byte order while the R200 294 * uses SZZZ for 24 bit depth, 8 bit stencil mode. 295 */ 296#define VALUE_TYPE GLuint 297 298#ifdef COMPILE_R300 299#define WRITE_DEPTH( _x, _y, d ) \ 300do { \ 301 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \ 302 GLuint tmp = *_ptr; \ 303 tmp &= 0x000000ff; \ 304 tmp |= ((d << 8) & 0xffffff00); \ 305 *_ptr = tmp; \ 306} while (0) 307#else 308#define WRITE_DEPTH( _x, _y, d ) \ 309do { \ 310 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \ 311 GLuint tmp = *_ptr; \ 312 tmp &= 0xff000000; \ 313 tmp |= ((d) & 0x00ffffff); \ 314 *_ptr = tmp; \ 315} while (0) 316#endif 317 318#ifdef COMPILE_R300 319#define READ_DEPTH( d, _x, _y ) \ 320 do { \ 321 d = (*(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off)) & 0xffffff00) >> 8; \ 322 }while(0) 323#else 324#define READ_DEPTH( d, _x, _y ) \ 325 d = *(GLuint*)(radeon_ptr32(rrb, _x + x_off, _y + y_off )) & 0x00ffffff; 326#endif 327/* 328 fprintf(stderr, "dval(%d, %d, %d, %d)=0x%08X\n", _x, xo, _y, yo, d);\ 329 d = *(GLuint*)(radeon_ptr(rrb, _x, _y )) & 0x00ffffff; 330*/ 331#define TAG(x) radeon##x##_z24_s8 332#include "depthtmp.h" 333 334/* ================================================================ 335 * Stencil buffer 336 */ 337 338/* 24 bit depth, 8 bit stencil depthbuffer functions 339 */ 340#ifdef COMPILE_R300 341#define WRITE_STENCIL( _x, _y, d ) \ 342do { \ 343 GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + x_off, _y + y_off); \ 344 GLuint tmp = *_ptr; \ 345 tmp &= 0xffffff00; \ 346 tmp |= (d) & 0xff; \ 347 *_ptr = tmp; \ 348} while (0) 349#else 350#define WRITE_STENCIL( _x, _y, d ) \ 351do { \ 352 GLuint *_ptr = (GLuint*)radeon_ptr32(rrb, _x + x_off, _y + y_off); \ 353 GLuint tmp = *_ptr; \ 354 tmp &= 0x00ffffff; \ 355 tmp |= (((d) & 0xff) << 24); \ 356 *_ptr = tmp; \ 357} while (0) 358#endif 359 360#ifdef COMPILE_R300 361#define READ_STENCIL( d, _x, _y ) \ 362do { \ 363 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \ 364 GLuint tmp = *_ptr; \ 365 d = tmp & 0x000000ff; \ 366} while (0) 367#else 368#define READ_STENCIL( d, _x, _y ) \ 369do { \ 370 GLuint *_ptr = (GLuint*)radeon_ptr32( rrb, _x + x_off, _y + y_off ); \ 371 GLuint tmp = *_ptr; \ 372 d = (tmp & 0xff000000) >> 24; \ 373} while (0) 374#endif 375 376#define TAG(x) radeon##x##_z24_s8 377#include "stenciltmp.h" 378 379 380static void map_unmap_rb(struct gl_renderbuffer *rb, int flag) 381{ 382 struct radeon_renderbuffer *rrb = radeon_renderbuffer(rb); 383 int r; 384 385 if (rrb == NULL || !rrb->bo) 386 return; 387 388 if (flag) { 389 r = radeon_bo_map(rrb->bo, 1); 390 if (r) { 391 fprintf(stderr, "(%s) error(%d) mapping buffer.\n", 392 __FUNCTION__, r); 393 } 394 395 radeonSetSpanFunctions(rrb); 396 } else { 397 radeon_bo_unmap(rrb->bo); 398 rb->GetRow = NULL; 399 rb->PutRow = NULL; 400 } 401} 402 403static void 404radeon_map_unmap_buffers(GLcontext *ctx, GLboolean map) 405{ 406 GLuint i, j; 407 408 /* color draw buffers */ 409 for (j = 0; j < ctx->DrawBuffer->_NumColorDrawBuffers; j++) 410 map_unmap_rb(ctx->DrawBuffer->_ColorDrawBuffers[j], map); 411 412 /* check for render to textures */ 413 for (i = 0; i < BUFFER_COUNT; i++) { 414 struct gl_renderbuffer_attachment *att = 415 ctx->DrawBuffer->Attachment + i; 416 struct gl_texture_object *tex = att->Texture; 417 if (tex) { 418 /* render to texture */ 419 ASSERT(att->Renderbuffer); 420 if (map) 421 ctx->Driver.MapTexture(ctx, tex); 422 else 423 ctx->Driver.UnmapTexture(ctx, tex); 424 } 425 } 426 427 map_unmap_rb(ctx->ReadBuffer->_ColorReadBuffer, map); 428 429 /* depth buffer (Note wrapper!) */ 430 if (ctx->DrawBuffer->_DepthBuffer) 431 map_unmap_rb(ctx->DrawBuffer->_DepthBuffer->Wrapped, map); 432 433 if (ctx->DrawBuffer->_StencilBuffer) 434 map_unmap_rb(ctx->DrawBuffer->_StencilBuffer->Wrapped, map); 435 436} 437static void radeonSpanRenderStart(GLcontext * ctx) 438{ 439 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 440 int i; 441 442 radeon_firevertices(rmesa); 443 444 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 445 if (ctx->Texture.Unit[i]._ReallyEnabled) 446 ctx->Driver.MapTexture(ctx, ctx->Texture.Unit[i]._Current); 447 } 448 449 radeon_map_unmap_buffers(ctx, 1); 450 451 /* The locking and wait for idle should really only be needed in classic mode. 452 * In a future memory manager based implementation, this should become 453 * unnecessary due to the fact that mapping our buffers, textures, etc. 454 * should implicitly wait for any previous rendering commands that must 455 * be waited on. */ 456 LOCK_HARDWARE(rmesa); 457 radeonWaitForIdleLocked(rmesa); 458} 459 460static void radeonSpanRenderFinish(GLcontext * ctx) 461{ 462 radeonContextPtr rmesa = RADEON_CONTEXT(ctx); 463 int i; 464 _swrast_flush(ctx); 465 UNLOCK_HARDWARE(rmesa); 466 467 for (i = 0; i < ctx->Const.MaxTextureImageUnits; i++) { 468 if (ctx->Texture.Unit[i]._ReallyEnabled) 469 ctx->Driver.UnmapTexture(ctx, ctx->Texture.Unit[i]._Current); 470 } 471 472 radeon_map_unmap_buffers(ctx, 0); 473} 474 475void radeonInitSpanFuncs(GLcontext * ctx) 476{ 477 struct swrast_device_driver *swdd = 478 _swrast_GetDeviceDriverReference(ctx); 479 swdd->SpanRenderStart = radeonSpanRenderStart; 480 swdd->SpanRenderFinish = radeonSpanRenderFinish; 481} 482 483/** 484 * Plug in the Get/Put routines for the given driRenderbuffer. 485 */ 486static void radeonSetSpanFunctions(struct radeon_renderbuffer *rrb) 487{ 488 if (rrb->base._ActualFormat == GL_RGB5) { 489 radeonInitPointers_RGB565(&rrb->base); 490 } else if (rrb->base._ActualFormat == GL_RGB8) { 491 radeonInitPointers_xRGB8888(&rrb->base); 492 } else if (rrb->base._ActualFormat == GL_RGBA8) { 493 radeonInitPointers_ARGB8888(&rrb->base); 494 } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT16) { 495 radeonInitDepthPointers_z16(&rrb->base); 496 } else if (rrb->base._ActualFormat == GL_DEPTH_COMPONENT24) { 497 radeonInitDepthPointers_z24_s8(&rrb->base); 498 } else if (rrb->base._ActualFormat == GL_DEPTH24_STENCIL8_EXT) { 499 radeonInitStencilPointers_z24_s8(&rrb->base); 500 } else if (rrb->base._ActualFormat == GL_STENCIL_INDEX8_EXT) { 501 radeonInitStencilPointers_z24_s8(&rrb->base); 502 } 503} 504