intel_blit.c revision efb0417040f4bdd55cf07dd8f54ecd4fc92c1286
1/************************************************************************** 2 * 3 * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28 29#include "main/mtypes.h" 30#include "main/context.h" 31#include "main/enums.h" 32#include "main/colormac.h" 33 34#include "intel_blit.h" 35#include "intel_buffers.h" 36#include "intel_context.h" 37#include "intel_fbo.h" 38#include "intel_reg.h" 39#include "intel_regions.h" 40#include "intel_batchbuffer.h" 41 42#define FILE_DEBUG_FLAG DEBUG_BLIT 43 44static GLuint translate_raster_op(GLenum logicop) 45{ 46 switch(logicop) { 47 case GL_CLEAR: return 0x00; 48 case GL_AND: return 0x88; 49 case GL_AND_REVERSE: return 0x44; 50 case GL_COPY: return 0xCC; 51 case GL_AND_INVERTED: return 0x22; 52 case GL_NOOP: return 0xAA; 53 case GL_XOR: return 0x66; 54 case GL_OR: return 0xEE; 55 case GL_NOR: return 0x11; 56 case GL_EQUIV: return 0x99; 57 case GL_INVERT: return 0x55; 58 case GL_OR_REVERSE: return 0xDD; 59 case GL_COPY_INVERTED: return 0x33; 60 case GL_OR_INVERTED: return 0xBB; 61 case GL_NAND: return 0x77; 62 case GL_SET: return 0xFF; 63 default: return 0; 64 } 65} 66 67static uint32_t 68br13_for_cpp(int cpp) 69{ 70 switch (cpp) { 71 case 4: 72 return BR13_8888; 73 break; 74 case 2: 75 return BR13_565; 76 break; 77 case 1: 78 return BR13_8; 79 break; 80 default: 81 assert(0); 82 return 0; 83 } 84} 85 86/* Copy BitBlt 87 */ 88GLboolean 89intelEmitCopyBlit(struct intel_context *intel, 90 GLuint cpp, 91 GLshort src_pitch, 92 drm_intel_bo *src_buffer, 93 GLuint src_offset, 94 uint32_t src_tiling, 95 GLshort dst_pitch, 96 drm_intel_bo *dst_buffer, 97 GLuint dst_offset, 98 uint32_t dst_tiling, 99 GLshort src_x, GLshort src_y, 100 GLshort dst_x, GLshort dst_y, 101 GLshort w, GLshort h, 102 GLenum logic_op) 103{ 104 GLuint CMD, BR13, pass = 0; 105 int dst_y2 = dst_y + h; 106 int dst_x2 = dst_x + w; 107 drm_intel_bo *aper_array[3]; 108 BATCH_LOCALS; 109 110 /* Blits are in a different ringbuffer so we don't use them. */ 111 if (intel->gen >= 6) 112 return GL_FALSE; 113 114 if (dst_tiling != I915_TILING_NONE) { 115 if (dst_offset & 4095) 116 return GL_FALSE; 117 if (dst_tiling == I915_TILING_Y) 118 return GL_FALSE; 119 } 120 if (src_tiling != I915_TILING_NONE) { 121 if (src_offset & 4095) 122 return GL_FALSE; 123 if (src_tiling == I915_TILING_Y) 124 return GL_FALSE; 125 } 126 127 /* do space check before going any further */ 128 do { 129 aper_array[0] = intel->batch->buf; 130 aper_array[1] = dst_buffer; 131 aper_array[2] = src_buffer; 132 133 if (dri_bufmgr_check_aperture_space(aper_array, 3) != 0) { 134 intel_batchbuffer_flush(intel->batch); 135 pass++; 136 } else 137 break; 138 } while (pass < 2); 139 140 if (pass >= 2) 141 return GL_FALSE; 142 143 intel_batchbuffer_require_space(intel->batch, 8 * 4); 144 DBG("%s src:buf(%p)/%d+%d %d,%d dst:buf(%p)/%d+%d %d,%d sz:%dx%d\n", 145 __FUNCTION__, 146 src_buffer, src_pitch, src_offset, src_x, src_y, 147 dst_buffer, dst_pitch, dst_offset, dst_x, dst_y, w, h); 148 149 src_pitch *= cpp; 150 dst_pitch *= cpp; 151 152 BR13 = br13_for_cpp(cpp) | translate_raster_op(logic_op) << 16; 153 154 switch (cpp) { 155 case 1: 156 case 2: 157 CMD = XY_SRC_COPY_BLT_CMD; 158 break; 159 case 4: 160 CMD = XY_SRC_COPY_BLT_CMD | XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; 161 break; 162 default: 163 return GL_FALSE; 164 } 165 166#ifndef I915 167 if (dst_tiling != I915_TILING_NONE) { 168 CMD |= XY_DST_TILED; 169 dst_pitch /= 4; 170 } 171 if (src_tiling != I915_TILING_NONE) { 172 CMD |= XY_SRC_TILED; 173 src_pitch /= 4; 174 } 175#endif 176 177 if (dst_y2 <= dst_y || dst_x2 <= dst_x) { 178 return GL_TRUE; 179 } 180 181 assert(dst_x < dst_x2); 182 assert(dst_y < dst_y2); 183 184 BEGIN_BATCH(8); 185 OUT_BATCH(CMD); 186 OUT_BATCH(BR13 | (uint16_t)dst_pitch); 187 OUT_BATCH((dst_y << 16) | dst_x); 188 OUT_BATCH((dst_y2 << 16) | dst_x2); 189 OUT_RELOC_FENCED(dst_buffer, 190 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 191 dst_offset); 192 OUT_BATCH((src_y << 16) | src_x); 193 OUT_BATCH((uint16_t)src_pitch); 194 OUT_RELOC_FENCED(src_buffer, 195 I915_GEM_DOMAIN_RENDER, 0, 196 src_offset); 197 ADVANCE_BATCH(); 198 199 intel_batchbuffer_emit_mi_flush(intel->batch); 200 201 return GL_TRUE; 202} 203 204 205/** 206 * Use blitting to clear the renderbuffers named by 'flags'. 207 * Note: we can't use the ctx->DrawBuffer->_ColorDrawBufferIndexes field 208 * since that might include software renderbuffers or renderbuffers 209 * which we're clearing with triangles. 210 * \param mask bitmask of BUFFER_BIT_* values indicating buffers to clear 211 */ 212void 213intelClearWithBlit(struct gl_context *ctx, GLbitfield mask) 214{ 215 struct intel_context *intel = intel_context(ctx); 216 struct gl_framebuffer *fb = ctx->DrawBuffer; 217 GLuint clear_depth; 218 GLboolean all; 219 GLint cx, cy, cw, ch; 220 BATCH_LOCALS; 221 222 /* Blits are in a different ringbuffer so we don't use them. */ 223 assert(intel->gen < 6); 224 225 /* 226 * Compute values for clearing the buffers. 227 */ 228 clear_depth = 0; 229 if (mask & BUFFER_BIT_DEPTH) { 230 clear_depth = (GLuint) (fb->_DepthMax * ctx->Depth.Clear); 231 } 232 if (mask & BUFFER_BIT_STENCIL) { 233 clear_depth |= (ctx->Stencil.Clear & 0xff) << 24; 234 } 235 236 cx = fb->_Xmin; 237 if (fb->Name == 0) 238 cy = ctx->DrawBuffer->Height - fb->_Ymax; 239 else 240 cy = fb->_Ymin; 241 cw = fb->_Xmax - fb->_Xmin; 242 ch = fb->_Ymax - fb->_Ymin; 243 244 if (cw == 0 || ch == 0) 245 return; 246 247 GLuint buf; 248 all = (cw == fb->Width && ch == fb->Height); 249 250 /* Loop over all renderbuffers */ 251 for (buf = 0; buf < BUFFER_COUNT && mask; buf++) { 252 const GLbitfield bufBit = 1 << buf; 253 struct intel_renderbuffer *irb; 254 drm_intel_bo *write_buffer; 255 int x1, y1, x2, y2; 256 uint32_t clear_val; 257 uint32_t BR13, CMD; 258 int pitch, cpp; 259 drm_intel_bo *aper_array[2]; 260 261 if (!(mask & bufBit)) 262 continue; 263 264 /* OK, clear this renderbuffer */ 265 irb = intel_get_renderbuffer(fb, buf); 266 write_buffer = intel_region_buffer(intel, irb->region, 267 all ? INTEL_WRITE_FULL : 268 INTEL_WRITE_PART); 269 x1 = cx + irb->region->draw_x; 270 y1 = cy + irb->region->draw_y; 271 x2 = cx + cw + irb->region->draw_x; 272 y2 = cy + ch + irb->region->draw_y; 273 274 pitch = irb->region->pitch; 275 cpp = irb->region->cpp; 276 277 DBG("%s dst:buf(%p)/%d %d,%d sz:%dx%d\n", 278 __FUNCTION__, 279 irb->region->buffer, (pitch * cpp), 280 x1, y1, x2 - x1, y2 - y1); 281 282 BR13 = br13_for_cpp(cpp) | 0xf0 << 16; 283 CMD = XY_COLOR_BLT_CMD; 284 285 /* Setup the blit command */ 286 if (cpp == 4) { 287 if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) { 288 if (mask & BUFFER_BIT_DEPTH) 289 CMD |= XY_BLT_WRITE_RGB; 290 if (mask & BUFFER_BIT_STENCIL) 291 CMD |= XY_BLT_WRITE_ALPHA; 292 } else { 293 /* clearing RGBA */ 294 CMD |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; 295 } 296 } 297 298 assert(irb->region->tiling != I915_TILING_Y); 299 300#ifndef I915 301 if (irb->region->tiling != I915_TILING_NONE) { 302 CMD |= XY_DST_TILED; 303 pitch /= 4; 304 } 305#endif 306 BR13 |= (pitch * cpp); 307 308 if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) { 309 clear_val = clear_depth; 310 } else { 311 uint8_t clear[4]; 312 GLclampf *color = ctx->Color.ClearColor; 313 314 CLAMPED_FLOAT_TO_UBYTE(clear[0], color[0]); 315 CLAMPED_FLOAT_TO_UBYTE(clear[1], color[1]); 316 CLAMPED_FLOAT_TO_UBYTE(clear[2], color[2]); 317 CLAMPED_FLOAT_TO_UBYTE(clear[3], color[3]); 318 319 switch (irb->Base.Format) { 320 case MESA_FORMAT_ARGB8888: 321 case MESA_FORMAT_XRGB8888: 322 clear_val = PACK_COLOR_8888(clear[3], clear[0], 323 clear[1], clear[2]); 324 break; 325 case MESA_FORMAT_RGB565: 326 clear_val = PACK_COLOR_565(clear[0], clear[1], clear[2]); 327 break; 328 case MESA_FORMAT_ARGB4444: 329 clear_val = PACK_COLOR_4444(clear[3], clear[0], 330 clear[1], clear[2]); 331 break; 332 case MESA_FORMAT_ARGB1555: 333 clear_val = PACK_COLOR_1555(clear[3], clear[0], 334 clear[1], clear[2]); 335 break; 336 case MESA_FORMAT_A8: 337 clear_val = PACK_COLOR_8888(clear[3], clear[3], 338 clear[3], clear[3]); 339 break; 340 default: 341 _mesa_problem(ctx, "Unexpected renderbuffer format: %d\n", 342 irb->Base.Format); 343 clear_val = 0; 344 } 345 } 346 347 assert(x1 < x2); 348 assert(y1 < y2); 349 350 /* do space check before going any further */ 351 aper_array[0] = intel->batch->buf; 352 aper_array[1] = write_buffer; 353 354 if (drm_intel_bufmgr_check_aperture_space(aper_array, 355 ARRAY_SIZE(aper_array)) != 0) { 356 intel_batchbuffer_flush(intel->batch); 357 } 358 359 BEGIN_BATCH(6); 360 OUT_BATCH(CMD); 361 OUT_BATCH(BR13); 362 OUT_BATCH((y1 << 16) | x1); 363 OUT_BATCH((y2 << 16) | x2); 364 OUT_RELOC_FENCED(write_buffer, 365 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 366 0); 367 OUT_BATCH(clear_val); 368 ADVANCE_BATCH(); 369 370 if (intel->always_flush_cache) 371 intel_batchbuffer_emit_mi_flush(intel->batch); 372 373 if (buf == BUFFER_DEPTH || buf == BUFFER_STENCIL) 374 mask &= ~(BUFFER_BIT_DEPTH | BUFFER_BIT_STENCIL); 375 else 376 mask &= ~bufBit; /* turn off bit, for faster loop exit */ 377 } 378} 379 380GLboolean 381intelEmitImmediateColorExpandBlit(struct intel_context *intel, 382 GLuint cpp, 383 GLubyte *src_bits, GLuint src_size, 384 GLuint fg_color, 385 GLshort dst_pitch, 386 drm_intel_bo *dst_buffer, 387 GLuint dst_offset, 388 uint32_t dst_tiling, 389 GLshort x, GLshort y, 390 GLshort w, GLshort h, 391 GLenum logic_op) 392{ 393 int dwords = ALIGN(src_size, 8) / 4; 394 uint32_t opcode, br13, blit_cmd; 395 396 /* Blits are in a different ringbuffer so we don't use them. */ 397 if (intel->gen >= 6) 398 return GL_FALSE; 399 400 if (dst_tiling != I915_TILING_NONE) { 401 if (dst_offset & 4095) 402 return GL_FALSE; 403 if (dst_tiling == I915_TILING_Y) 404 return GL_FALSE; 405 } 406 407 assert( logic_op - GL_CLEAR >= 0 ); 408 assert( logic_op - GL_CLEAR < 0x10 ); 409 assert(dst_pitch > 0); 410 411 if (w < 0 || h < 0) 412 return GL_TRUE; 413 414 dst_pitch *= cpp; 415 416 DBG("%s dst:buf(%p)/%d+%d %d,%d sz:%dx%d, %d bytes %d dwords\n", 417 __FUNCTION__, 418 dst_buffer, dst_pitch, dst_offset, x, y, w, h, src_size, dwords); 419 420 intel_batchbuffer_require_space( intel->batch, 421 (8 * 4) + 422 (3 * 4) + 423 dwords * 4 ); 424 425 opcode = XY_SETUP_BLT_CMD; 426 if (cpp == 4) 427 opcode |= XY_BLT_WRITE_ALPHA | XY_BLT_WRITE_RGB; 428#ifndef I915 429 if (dst_tiling != I915_TILING_NONE) { 430 opcode |= XY_DST_TILED; 431 dst_pitch /= 4; 432 } 433#endif 434 435 br13 = dst_pitch | (translate_raster_op(logic_op) << 16) | (1 << 29); 436 br13 |= br13_for_cpp(cpp); 437 438 blit_cmd = XY_TEXT_IMMEDIATE_BLIT_CMD | XY_TEXT_BYTE_PACKED; /* packing? */ 439 if (dst_tiling != I915_TILING_NONE) 440 blit_cmd |= XY_DST_TILED; 441 442 BEGIN_BATCH(8 + 3); 443 OUT_BATCH(opcode); 444 OUT_BATCH(br13); 445 OUT_BATCH((0 << 16) | 0); /* clip x1, y1 */ 446 OUT_BATCH((100 << 16) | 100); /* clip x2, y2 */ 447 OUT_RELOC_FENCED(dst_buffer, 448 I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, 449 dst_offset); 450 OUT_BATCH(0); /* bg */ 451 OUT_BATCH(fg_color); /* fg */ 452 OUT_BATCH(0); /* pattern base addr */ 453 454 OUT_BATCH(blit_cmd | ((3 - 2) + dwords)); 455 OUT_BATCH((y << 16) | x); 456 OUT_BATCH(((y + h) << 16) | (x + w)); 457 ADVANCE_BATCH(); 458 459 intel_batchbuffer_data( intel->batch, 460 src_bits, 461 dwords * 4 ); 462 463 intel_batchbuffer_emit_mi_flush(intel->batch); 464 465 return GL_TRUE; 466} 467 468/* We don't have a memmove-type blit like some other hardware, so we'll do a 469 * rectangular blit covering a large space, then emit 1-scanline blit at the 470 * end to cover the last if we need. 471 */ 472void 473intel_emit_linear_blit(struct intel_context *intel, 474 drm_intel_bo *dst_bo, 475 unsigned int dst_offset, 476 drm_intel_bo *src_bo, 477 unsigned int src_offset, 478 unsigned int size) 479{ 480 GLuint pitch, height; 481 GLboolean ok; 482 483 /* Blits are in a different ringbuffer so we don't use them. */ 484 assert(intel->gen < 6); 485 486 /* The pitch given to the GPU must be DWORD aligned, and 487 * we want width to match pitch. Max width is (1 << 15 - 1), 488 * rounding that down to the nearest DWORD is 1 << 15 - 4 489 */ 490 pitch = MIN2(size, (1 << 15) - 4); 491 height = size / pitch; 492 ok = intelEmitCopyBlit(intel, 1, 493 pitch, src_bo, src_offset, I915_TILING_NONE, 494 pitch, dst_bo, dst_offset, I915_TILING_NONE, 495 0, 0, /* src x/y */ 496 0, 0, /* dst x/y */ 497 pitch, height, /* w, h */ 498 GL_COPY); 499 assert(ok); 500 501 src_offset += pitch * height; 502 dst_offset += pitch * height; 503 size -= pitch * height; 504 assert (size < (1 << 15)); 505 assert ((size & 3) == 0); /* Pitch must be DWORD aligned */ 506 if (size != 0) { 507 ok = intelEmitCopyBlit(intel, 1, 508 size, src_bo, src_offset, I915_TILING_NONE, 509 size, dst_bo, dst_offset, I915_TILING_NONE, 510 0, 0, /* src x/y */ 511 0, 0, /* dst x/y */ 512 size, 1, /* w, h */ 513 GL_COPY); 514 assert(ok); 515 } 516} 517