intel_blit.c revision da2816a45e6e3a33246a341fee72e6f893f315d9
1/************************************************************************** 2 * 3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29#include "main/mtypes.h" 30#include "main/context.h" 31#include "main/enums.h" 32#include "main/colormac.h" 33 34#include "intel_blit.h" 35#include "intel_buffers.h" 36#include "intel_context.h" 37#include "intel_fbo.h" 38#include "intel_reg.h" 39#include "intel_regions.h" 40#include "intel_batchbuffer.h" 41#include "intel_mipmap_tree.h" 42 43#define FILE_DEBUG_FLAG DEBUG_BLIT 44 45static GLuint translate_raster_op(GLenum logicop) 46{ 47 switch(logicop) { 48 case GL_CLEAR: return 0x00; 49 case GL_AND: return 0x88; 50 case GL_AND_REVERSE: return 0x44; 51 case GL_COPY: return 0xCC; 52 case GL_AND_INVERTED: return 0x22; 53 case GL_NOOP: return 0xAA; 54 case GL_XOR: return 0x66; 55 case GL_OR: return 0xEE; 56 case GL_NOR: return 0x11; 57 case GL_EQUIV: return 0x99; 58 case GL_INVERT: return 0x55; 59 case GL_OR_REVERSE: return 0xDD; 60 case GL_COPY_INVERTED: return 0x33; 61 case GL_OR_INVERTED: return 0xBB; 62 case GL_NAND: return 0x77; 63 case GL_SET: return 0xFF; 64 default: return 0; 65 } 66} 67 68static uint32_t 69br13_for_cpp(int cpp) 70{ 71 switch (cpp) { 72 case 4: 73 return BR13_8888; 74 break; 75 case 2: 76 return BR13_565; 77 break; 78 case 1: 79 return BR13_8; 80 break; 81 default: 82 assert(0); 83 return 0; 84 } 85} 86 87/* Copy BitBlt 88 */ 89bool 90intelEmitCopyBlit(struct intel_context *intel, 91 GLuint cpp, 92 GLshort src_pitch, 93 drm_intel_bo *src_buffer, 94 GLuint src_offset, 95 uint32_t src_tiling, 96 GLshort dst_pitch, 97 drm_intel_bo *dst_buffer, 98 GLuint dst_offset, 99 uint32_t dst_tiling, 100 GLshort src_x, GLshort src_y, 101 GLshort dst_x, GLshort dst_y, 102 GLshort w, GLshort h, 103 GLenum logic_op) 104{ 105 GLuint CMD, BR13, pass = 0; 106 int dst_y2 = dst_y + h; 107 int dst_x2 = dst_x + w; 108 drm_intel_bo *aper_array[3]; 109 BATCH_LOCALS; 110 111 if (dst_tiling != I915_TILING_NONE) { 112 if (dst_offset & 4095) 113 return false; 114 if (dst_tiling == I915_TILING_Y) 115 return false; 116 } 117 if (src_tiling != I915_TILING_NONE) { 118 if (src_offset & 4095) 119 return false; 120 if (src_tiling == I915_TILING_Y) 121 return false; 122 } 123 124 /* do space check before going any further */ 125 do { 126 aper_array[0] = intel->batch.bo; 127 aper_array[1] = dst_buffer; 128 aper_array[2] = src_buffer; 129 130 if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) { 131 intel_batchbuffer_flush(intel); 132 pass++; 133 } else 134 break; 135 } while (pass < 2); 136 137 if (pass >= 2) 138 return false; 139 140 intel_batchbuffer_require_space(intel, 8 * 4, true); 141 DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", 142 __FUNCTION__, 143 src_buffer, src_pitch, src_offset, src_x, src_y, 144 dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); 145 146 src_pitch *= cpp; 147 dst_pitch *= cpp; 148 149 /* Blit pitch must be dword-aligned. Otherwise, the hardware appears to drop 150 * the low bits. 151 */ 152 assert(src_pitch % 4 == 0); 153 assert(dst_pitch % 4 == 0); 154 155 /* For big formats (such as floating point), do the copy using 32bpp and 156 * multiply the coordinates. 157 */ 158 if (cpp > 4) { 159 assert(cpp % 4 == 0); 160 dst_x *= cpp / 4; 161 dst_x2 *= cpp / 4; 162 src_x *= cpp / 4; 163 cpp = 4; 164 } 165 166 BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16; 167 168 switch (cpp) { 169 case 1: 170 case 2: 171 CMD = XY_SRC_COPY_BLT_CMD; 172 break; 173 case 4: 174 CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; 175 break; 176 default: 177 return false; 178 } 179 180#ifndef I915 181 if (dst_tiling != I915_TILING_NONE) { 182 CMD |= XY_DST_TILED; 183 dst_pitch /= 4; 184 } 185 if (src_tiling != I915_TILING_NONE) { 186 CMD |= XY_SRC_TILED; 187 src_pitch /= 4; 188 } 189#endif 190 191 if (dst_y2 <= dst_y || dst_x2 <= dst_x) { 192 return true; 193 } 194 195 assert(dst_x < dst_x2); 196 assert(dst_y < dst_y2); 197 198 BEGIN_BATCH_BLT(8); 199 OUT_BATCH(CMD); 200 OUT_BATCH(BR13 | (uint16_t)dst_pitch); 201 OUT_BATCH((dst_y << 16) | dst_x); 202 OUT_BATCH((dst_y2 << 16) | dst_x2); 203 OUT_RELOC_FENCED(dst_buffer, 204 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 205 dst_offset); 206 OUT_BATCH((src_y << 16) | src_x); 207 OUT_BATCH((uint16_t)src_pitch); 208 OUT_RELOC_FENCED(src_buffer, 209 I915_GEM_DOMAIN_RENDER, 0, 210 src_offset); 211 ADVANCE_BATCH(); 212 213 intel_batchbuffer_emit_mi_flush(intel); 214 215 return true; 216} 217 218 219/** 220 * Use blitting to clear the renderbuffers named by 'flags'. 221 * Note: we can't use the ctx->DrawBuffer->_ColorDrawBufferIndexes field 222 * since that might include software renderbuffers or renderbuffers 223 * which we're clearing with triangles. 224 * \param mask bitmask of BUFFER_BIT_* values indicating buffers to clear 225 */ 226GLbitfield 227intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) 228{ 229 struct intel_context *intel = intel_context(ctx); 230 struct gl_framebuffer *fb = ctx->DrawBuffer; 231 GLuint clear_depth_value, clear_depth_mask; 232 GLint cx, cy, cw, ch; 233 GLbitfield fail_mask = 0; 234 BATCH_LOCALS; 235 236 /* 237 * Compute values for clearing the buffers. 238 */ 239 clear_depth_value = 0; 240 clear_depth_mask = 0; 241 if (mask & BUFFER_BIT_DEPTH) { 242 clear_depth_value = (GLuint) (fb->_DepthMax * ctx->Depth.Clear); 243 clear_depth_mask = XY_BLT_WRITE_RGB; 244 } 245 if (mask & BUFFER_BIT_STENCIL) { 246 clear_depth_value |= (ctx->Stencil.Clear & 0xff) << 24; 247 clear_depth_mask |= XY_BLT_WRITE_ALPHA; 248 } 249 250 cx = fb->_Xmin; 251 if (fb->Name == 0) 252 cy = ctx->DrawBuffer->Height - fb->_Ymax; 253 else 254 cy = fb->_Ymin; 255 cw = fb->_Xmax - fb->_Xmin; 256 ch = fb->_Ymax - fb->_Ymin; 257 258 if (cw == 0 || ch == 0) 259 return 0; 260 261 /* Loop over all renderbuffers */ 262 mask &= (1 << BUFFER_COUNT) - 1; 263 while (mask) { 264 GLuint buf = _mesa_ffs(mask) - 1; 265 bool is_depth_stencil = buf == BUFFER_DEPTH || buf == BUFFER_STENCIL; 266 struct intel_renderbuffer *irb; 267 int x1, y1, x2, y2; 268 uint32_t clear_val; 269 uint32_t BR13, CMD; 270 struct intel_region *region; 271 int pitch, cpp; 272 drm_intel_bo *aper_array[2]; 273 274 mask &= ~(1 << buf); 275 276 irb = intel_get_renderbuffer(fb, buf); 277 if (irb && irb->mt) { 278 region = irb->mt->region; 279 assert(region); 280 assert(region->bo); 281 } else { 282 fail_mask |= 1 << buf; 283 continue; 284 } 285 286 /* OK, clear this renderbuffer */ 287 x1 = cx + irb->draw_x; 288 y1 = cy + irb->draw_y; 289 x2 = cx + cw + irb->draw_x; 290 y2 = cy + ch + irb->draw_y; 291 292 pitch = region->pitch; 293 cpp = region->cpp; 294 295 DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n", 296 __FUNCTION__, 297 region->bo, (pitch * cpp), 298 x1, y1, x2 - x1, y2 - y1); 299 300 BR13 = 0xf0 << 16; 301 CMD = XY_COLOR_BLT_CMD; 302 303 /* Setup the blit command */ 304 if (cpp == 4) { 305 if (is_depth_stencil) { 306 CMD |= clear_depth_mask; 307 } else { 308 /* clearing RGBA */ 309 CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; 310 } 311 } 312 313 assert(region->tiling != I915_TILING_Y); 314 315#ifndef I915 316 if (region->tiling != I915_TILING_NONE) { 317 CMD |= XY_DST_TILED; 318 pitch /= 4; 319 } 320#endif 321 BR13 |= (pitch * cpp); 322 323 if (is_depth_stencil) { 324 clear_val = clear_depth_value; 325 } else { 326 uint8_t clear[4]; 327 GLfloat *color = ctx->Color.ClearColor.f; 328 329 _mesa_unclamped_float_rgba_to_ubyte(clear, color); 330 331 switch (irb->Base.Format) { 332 case MESA_FORMAT_ARGB8888: 333 case MESA_FORMAT_XRGB8888: 334 clear_val = PACK_COLOR_8888(clear[3], clear[0], 335 clear[1], clear[2]); 336 break; 337 case MESA_FORMAT_RGB565: 338 clear_val = PACK_COLOR_565(clear[0], clear[1], clear[2]); 339 break; 340 case MESA_FORMAT_ARGB4444: 341 clear_val = PACK_COLOR_4444(clear[3], clear[0], 342 clear[1], clear[2]); 343 break; 344 case MESA_FORMAT_ARGB1555: 345 clear_val = PACK_COLOR_1555(clear[3], clear[0], 346 clear[1], clear[2]); 347 break; 348 case MESA_FORMAT_A8: 349 clear_val = PACK_COLOR_8888(clear[3], clear[3], 350 clear[3], clear[3]); 351 break; 352 default: 353 fail_mask |= 1 << buf; 354 continue; 355 } 356 } 357 358 BR13 |= br13_for_cpp(cpp); 359 360 assert(x1 < x2); 361 assert(y1 < y2); 362 363 /* do space check before going any further */ 364 aper_array[0] = intel->batch.bo; 365 aper_array[1] = region->bo; 366 367 if (drm_intel_bufmgr_check_aperture_space(aper_array, 368 ARRAY_SIZE(aper_array)) != 0) { 369 intel_batchbuffer_flush(intel); 370 } 371 372 BEGIN_BATCH_BLT(6); 373 OUT_BATCH(CMD); 374 OUT_BATCH(BR13); 375 OUT_BATCH((y1 << 16) | x1); 376 OUT_BATCH((y2 << 16) | x2); 377 OUT_RELOC_FENCED(region->bo, 378 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 379 0); 380 OUT_BATCH(clear_val); 381 ADVANCE_BATCH(); 382 383 if (intel->always_flush_cache) 384 intel_batchbuffer_emit_mi_flush(intel); 385 386 if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) 387 mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL); 388 } 389 390 return fail_mask; 391} 392 393bool 394intelEmitImmediateColorExpandBlit(struct intel_context *intel, 395 GLuint cpp, 396 GLubyte *src_bits, GLuint src_size, 397 GLuint fg_color, 398 GLshort dst_pitch, 399 drm_intel_bo *dst_buffer, 400 GLuint dst_offset, 401 uint32_t dst_tiling, 402 GLshort x, GLshort y, 403 GLshort w, GLshort h, 404 GLenum logic_op) 405{ 406 int dwords = ALIGN(src_size, 8) / 4; 407 uint32_t opcode, br13, blit_cmd; 408 409 if (dst_tiling != I915_TILING_NONE) { 410 if (dst_offset & 4095) 411 return false; 412 if (dst_tiling == I915_TILING_Y) 413 return false; 414 } 415 416 assert( logic_op - GL_CLEAR >= 0 ); 417 assert( logic_op - GL_CLEAR < 0x10 ); 418 assert(dst_pitch > 0); 419 420 if (w < 0 || h < 0) 421 return true; 422 423 dst_pitch *= cpp; 424 425 DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n", 426 __FUNCTION__, 427 dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords); 428 429 intel_batchbuffer_require_space(intel, 430 (8 * 4) + 431 (3 * 4) + 432 dwords * 4, true); 433 434 opcode = XY_SETUP_BLT_CMD; 435 if (cpp == 4) 436 opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; 437#ifndef I915 438 if (dst_tiling != I915_TILING_NONE) { 439 opcode |= XY_DST_TILED; 440 dst_pitch /= 4; 441 } 442#endif 443 444 br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29); 445 br13 |= br13_for_cpp(cpp); 446 447 blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */ 448 if (dst_tiling != I915_TILING_NONE) 449 blit_cmd |= XY_DST_TILED; 450 451 BEGIN_BATCH_BLT(8 + 3); 452 OUT_BATCH(opcode); 453 OUT_BATCH(br13); 454 OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ 455 OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */ 456 OUT_RELOC_FENCED(dst_buffer, 457 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 458 dst_offset); 459 OUT_BATCH(0); /* bg */ 460 OUT_BATCH(fg_color); /* fg */ 461 OUT_BATCH(0); /* pattern base addr */ 462 463 OUT_BATCH(blit_cmd | ((3 - 2) + dwords)); 464 OUT_BATCH((y << 16) | x); 465 OUT_BATCH(((y + h) << 16) | (x + w)); 466 ADVANCE_BATCH(); 467 468 intel_batchbuffer_data(intel, src_bits, dwords * 4, true); 469 470 intel_batchbuffer_emit_mi_flush(intel); 471 472 return true; 473} 474 475/* We don't have a memmove-type blit like some other hardware, so we'll do a 476 * rectangular blit covering a large space, then emit 1-scanline blit at the 477 * end to cover the last if we need. 478 */ 479void 480intel_emit_linear_blit(struct intel_context *intel, 481 drm_intel_bo *dst_bo, 482 unsigned int dst_offset, 483 drm_intel_bo *src_bo, 484 unsigned int src_offset, 485 unsigned int size) 486{ 487 GLuint pitch, height; 488 bool ok; 489 490 /* The pitch given to the GPU must be DWORD aligned, and 491 * we want width to match pitch. Max width is (1 << 15 - 1), 492 * rounding that down to the nearest DWORD is 1 << 15 - 4 493 */ 494 pitch = MIN2(size, (1 << 15) - 4); 495 height = size / pitch; 496 ok = intelEmitCopyBlit(intel, 1, 497 pitch, src_bo, src_offset, I915_TILING_NONE, 498 pitch, dst_bo, dst_offset, I915_TILING_NONE, 499 0, 0, /* src x/y */ 500 0, 0, /* dst x/y */ 501 pitch, height, /* w, h */ 502 GL_COPY); 503 assert(ok); 504 505 src_offset += pitch * height; 506 dst_offset += pitch * height; 507 size -= pitch * height; 508 assert (size < (1 << 15)); 509 assert ((size & 3) == 0); /* Pitch must be DWORD aligned */ 510 if (size != 0) { 511 ok = intelEmitCopyBlit(intel, 1, 512 size, src_bo, src_offset, I915_TILING_NONE, 513 size, dst_bo, dst_offset, I915_TILING_NONE, 514 0, 0, /* src x/y */ 515 0, 0, /* dst x/y */ 516 size, 1, /* w, h */ 517 GL_COPY); 518 assert(ok); 519 } 520} 521 522/** 523 * Used to initialize the alpha value of an ARGB8888 teximage after 524 * loading it from an XRGB8888 source. 525 * 526 * This is very common with glCopyTexImage2D(). 527 */ 528void 529intel_set_teximage_alpha_to_one(struct gl_context *ctx, 530 struct intel_texture_image *intel_image) 531{ 532 struct intel_context *intel = intel_context(ctx); 533 unsigned int image_x, image_y; 534 uint32_t x1, y1, x2, y2; 535 uint32_t BR13, CMD; 536 int pitch, cpp; 537 drm_intel_bo *aper_array[2]; 538 struct intel_region *region = intel_image->mt->region; 539 int width, height, depth; 540 BATCH_LOCALS; 541 542 intel_miptree_get_dimensions_for_image(&intel_image->base.Base, 543 &width, &height, &depth); 544 assert(depth == 1); 545 546 assert(intel_image->base.Base.TexFormat == MESA_FORMAT_ARGB8888); 547 548 /* get dest x/y in destination texture */ 549 intel_miptree_get_image_offset(intel_image->mt, 550 intel_image->base.Base.Level, 551 intel_image->base.Base.Face, 552 0, 553 &image_x, &image_y); 554 555 x1 = image_x; 556 y1 = image_y; 557 x2 = image_x + width; 558 y2 = image_y + height; 559 560 pitch = region->pitch; 561 cpp = region->cpp; 562 563 DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n", 564 __FUNCTION__, 565 intel_image->mt->region->bo, (pitch * cpp), 566 x1, y1, x2 - x1, y2 - y1); 567 568 BR13 = br13_for_cpp(cpp) | 0xf0 << 16; 569 CMD = XY_COLOR_BLT_CMD; 570 CMD |= XY_BLT_WRITE_ALPHA; 571 572 assert(region->tiling != I915_TILING_Y); 573 574#ifndef I915 575 if (region->tiling != I915_TILING_NONE) { 576 CMD |= XY_DST_TILED; 577 pitch /= 4; 578 } 579#endif 580 BR13 |= (pitch * cpp); 581 582 /* do space check before going any further */ 583 aper_array[0] = intel->batch.bo; 584 aper_array[1] = region->bo; 585 586 if (drm_intel_bufmgr_check_aperture_space(aper_array, 587 ARRAY_SIZE(aper_array)) != 0) { 588 intel_batchbuffer_flush(intel); 589 } 590 591 BEGIN_BATCH_BLT(6); 592 OUT_BATCH(CMD); 593 OUT_BATCH(BR13); 594 OUT_BATCH((y1 << 16) | x1); 595 OUT_BATCH((y2 << 16) | x2); 596 OUT_RELOC_FENCED(region->bo, 597 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 598 0); 599 OUT_BATCH(0xffffffff); /* white, but only alpha gets written */ 600 ADVANCE_BATCH(); 601 602 intel_batchbuffer_emit_mi_flush(intel); 603} 604