1// Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15#include "Blitter.hpp" 16 17#include "Shader/ShaderCore.hpp" 18#include "Reactor/Reactor.hpp" 19#include "Common/Memory.hpp" 20#include "Common/Debug.hpp" 21 22namespace sw 23{ 24 Blitter::Blitter() 25 { 26 blitCache = new RoutineCache<State>(1024); 27 } 28 29 Blitter::~Blitter() 30 { 31 delete blitCache; 32 } 33 34 void Blitter::clear(void *pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask) 35 { 36 if(fastClear(pixel, format, dest, dRect, rgbaMask)) 37 { 38 return; 39 } 40 41 sw::Surface *color = sw::Surface::create(1, 1, 1, format, pixel, sw::Surface::bytes(format), sw::Surface::bytes(format)); 42 SliceRectF sRect((float)dRect.x0, (float)dRect.y0, (float)dRect.x1, (float)dRect.y1, 0); 43 blit(color, sRect, dest, dRect, {rgbaMask}); 44 delete color; 45 } 46 47 bool Blitter::fastClear(void *pixel, sw::Format format, Surface *dest, const SliceRect &dRect, unsigned int rgbaMask) 48 { 49 if(format != FORMAT_A32B32G32R32F) 50 { 51 return false; 52 } 53 54 float *color = (float*)pixel; 55 float r = color[0]; 56 float g = color[1]; 57 float b = color[2]; 58 float a = color[3]; 59 60 uint32_t packed; 61 62 switch(dest->getFormat()) 63 { 64 case FORMAT_R5G6B5: 65 if((rgbaMask & 0x7) != 0x7) return false; 66 packed = ((uint16_t)(31 * b + 0.5f) << 0) | 67 ((uint16_t)(63 * g + 0.5f) << 5) | 68 ((uint16_t)(31 * r + 0.5f) << 11); 69 break; 70 case FORMAT_X8B8G8R8: 71 if((rgbaMask & 0x7) != 0x7) return false; 72 packed = ((uint32_t)(255) << 24) | 73 ((uint32_t)(255 * b + 0.5f) << 16) | 74 ((uint32_t)(255 * g + 0.5f) << 8) | 75 ((uint32_t)(255 * r + 0.5f) << 0); 76 break; 77 case FORMAT_A8B8G8R8: 78 if((rgbaMask & 0xF) != 0xF) return false; 79 packed = ((uint32_t)(255 * a + 0.5f) << 24) | 80 ((uint32_t)(255 * b + 0.5f) << 16) | 81 ((uint32_t)(255 * g + 0.5f) << 8) | 82 ((uint32_t)(255 * r + 0.5f) << 0); 83 break; 84 case FORMAT_X8R8G8B8: 85 if((rgbaMask & 0x7) != 0x7) return false; 86 packed = ((uint32_t)(255) << 24) | 87 ((uint32_t)(255 * r + 0.5f) << 16) | 88 ((uint32_t)(255 * g + 0.5f) << 8) | 89 ((uint32_t)(255 * b + 0.5f) << 0); 90 break; 91 case FORMAT_A8R8G8B8: 92 if((rgbaMask & 0xF) != 0xF) return false; 93 packed = ((uint32_t)(255 * a + 0.5f) << 24) | 94 ((uint32_t)(255 * r + 0.5f) << 16) | 95 ((uint32_t)(255 * g + 0.5f) << 8) | 96 ((uint32_t)(255 * b + 0.5f) << 0); 97 break; 98 default: 99 return false; 100 } 101 102 bool useDestInternal = !dest->isExternalDirty(); 103 uint8_t *slice = (uint8_t*)dest->lock(dRect.x0, dRect.y0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC, useDestInternal); 104 105 for(int j = 0; j < dest->getSamples(); j++) 106 { 107 uint8_t *d = slice; 108 109 switch(Surface::bytes(dest->getFormat())) 110 { 111 case 2: 112 for(int i = dRect.y0; i < dRect.y1; i++) 113 { 114 sw::clear((uint16_t*)d, packed, dRect.x1 - dRect.x0); 115 d += dest->getPitchB(useDestInternal); 116 } 117 break; 118 case 4: 119 for(int i = dRect.y0; i < dRect.y1; i++) 120 { 121 sw::clear((uint32_t*)d, packed, dRect.x1 - dRect.x0); 122 d += dest->getPitchB(useDestInternal); 123 } 124 break; 125 default: 126 assert(false); 127 } 128 129 slice += dest->getSliceB(useDestInternal); 130 } 131 132 dest->unlock(useDestInternal); 133 134 return true; 135 } 136 137 void Blitter::blit(Surface *source, const SliceRectF &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options& options) 138 { 139 if(dest->getInternalFormat() == FORMAT_NULL) 140 { 141 return; 142 } 143 144 if(blitReactor(source, sourceRect, dest, destRect, options)) 145 { 146 return; 147 } 148 149 SliceRectF sRect = sourceRect; 150 SliceRect dRect = destRect; 151 152 bool flipX = destRect.x0 > destRect.x1; 153 bool flipY = destRect.y0 > destRect.y1; 154 155 if(flipX) 156 { 157 swap(dRect.x0, dRect.x1); 158 swap(sRect.x0, sRect.x1); 159 } 160 if(flipY) 161 { 162 swap(dRect.y0, dRect.y1); 163 swap(sRect.y0, sRect.y1); 164 } 165 166 source->lockInternal((int)sRect.x0, (int)sRect.y0, sRect.slice, sw::LOCK_READONLY, sw::PUBLIC); 167 dest->lockInternal(dRect.x0, dRect.y0, dRect.slice, sw::LOCK_WRITEONLY, sw::PUBLIC); 168 169 float w = sRect.width() / dRect.width(); 170 float h = sRect.height() / dRect.height(); 171 172 const float xStart = sRect.x0 + 0.5f * w; 173 float y = sRect.y0 + 0.5f * h; 174 175 for(int j = dRect.y0; j < dRect.y1; j++) 176 { 177 float x = xStart; 178 179 for(int i = dRect.x0; i < dRect.x1; i++) 180 { 181 // FIXME: Support RGBA mask 182 dest->copyInternal(source, i, j, x, y, options.filter); 183 184 x += w; 185 } 186 187 y += h; 188 } 189 190 source->unlockInternal(); 191 dest->unlockInternal(); 192 } 193 194 void Blitter::blit3D(Surface *source, Surface *dest) 195 { 196 source->lockInternal(0, 0, 0, sw::LOCK_READONLY, sw::PUBLIC); 197 dest->lockInternal(0, 0, 0, sw::LOCK_WRITEONLY, sw::PUBLIC); 198 199 float w = static_cast<float>(source->getWidth()) / static_cast<float>(dest->getWidth()); 200 float h = static_cast<float>(source->getHeight()) / static_cast<float>(dest->getHeight()); 201 float d = static_cast<float>(source->getDepth()) / static_cast<float>(dest->getDepth()); 202 203 float z = 0.5f * d; 204 for(int k = 0; k < dest->getDepth(); k++) 205 { 206 float y = 0.5f * h; 207 for(int j = 0; j < dest->getHeight(); j++) 208 { 209 float x = 0.5f * w; 210 for(int i = 0; i < dest->getWidth(); i++) 211 { 212 dest->copyInternal(source, i, j, k, x, y, z, true); 213 x += w; 214 } 215 y += h; 216 } 217 z += d; 218 } 219 220 source->unlockInternal(); 221 dest->unlockInternal(); 222 } 223 224 bool Blitter::read(Float4 &c, Pointer<Byte> element, const State &state) 225 { 226 c = Float4(0.0f, 0.0f, 0.0f, 1.0f); 227 228 switch(state.sourceFormat) 229 { 230 case FORMAT_L8: 231 c.xyz = Float(Int(*Pointer<Byte>(element))); 232 c.w = float(0xFF); 233 break; 234 case FORMAT_A8: 235 c.w = Float(Int(*Pointer<Byte>(element))); 236 break; 237 case FORMAT_R8I: 238 case FORMAT_R8_SNORM: 239 c.x = Float(Int(*Pointer<SByte>(element))); 240 c.w = float(0x7F); 241 break; 242 case FORMAT_R8: 243 case FORMAT_R8UI: 244 c.x = Float(Int(*Pointer<Byte>(element))); 245 c.w = float(0xFF); 246 break; 247 case FORMAT_R16I: 248 c.x = Float(Int(*Pointer<Short>(element))); 249 c.w = float(0x7FFF); 250 break; 251 case FORMAT_R16UI: 252 c.x = Float(Int(*Pointer<UShort>(element))); 253 c.w = float(0xFFFF); 254 break; 255 case FORMAT_R32I: 256 c.x = Float(*Pointer<Int>(element)); 257 c.w = float(0x7FFFFFFF); 258 break; 259 case FORMAT_R32UI: 260 c.x = Float(*Pointer<UInt>(element)); 261 c.w = float(0xFFFFFFFF); 262 break; 263 case FORMAT_A8R8G8B8: 264 c = Float4(*Pointer<Byte4>(element)).zyxw; 265 break; 266 case FORMAT_A8B8G8R8I: 267 case FORMAT_A8B8G8R8_SNORM: 268 c = Float4(*Pointer<SByte4>(element)); 269 break; 270 case FORMAT_A8B8G8R8: 271 case FORMAT_A8B8G8R8UI: 272 case FORMAT_SRGB8_A8: 273 c = Float4(*Pointer<Byte4>(element)); 274 break; 275 case FORMAT_X8R8G8B8: 276 c = Float4(*Pointer<Byte4>(element)).zyxw; 277 c.w = float(0xFF); 278 break; 279 case FORMAT_R8G8B8: 280 c.z = Float(Int(*Pointer<Byte>(element + 0))); 281 c.y = Float(Int(*Pointer<Byte>(element + 1))); 282 c.x = Float(Int(*Pointer<Byte>(element + 2))); 283 c.w = float(0xFF); 284 break; 285 case FORMAT_B8G8R8: 286 c.x = Float(Int(*Pointer<Byte>(element + 0))); 287 c.y = Float(Int(*Pointer<Byte>(element + 1))); 288 c.z = Float(Int(*Pointer<Byte>(element + 2))); 289 c.w = float(0xFF); 290 break; 291 case FORMAT_X8B8G8R8I: 292 case FORMAT_X8B8G8R8_SNORM: 293 c = Float4(*Pointer<SByte4>(element)); 294 c.w = float(0x7F); 295 break; 296 case FORMAT_X8B8G8R8: 297 case FORMAT_X8B8G8R8UI: 298 case FORMAT_SRGB8_X8: 299 c = Float4(*Pointer<Byte4>(element)); 300 c.w = float(0xFF); 301 break; 302 case FORMAT_A16B16G16R16I: 303 c = Float4(*Pointer<Short4>(element)); 304 break; 305 case FORMAT_A16B16G16R16: 306 case FORMAT_A16B16G16R16UI: 307 c = Float4(*Pointer<UShort4>(element)); 308 break; 309 case FORMAT_X16B16G16R16I: 310 c = Float4(*Pointer<Short4>(element)); 311 c.w = float(0x7FFF); 312 break; 313 case FORMAT_X16B16G16R16UI: 314 c = Float4(*Pointer<UShort4>(element)); 315 c.w = float(0xFFFF); 316 break; 317 case FORMAT_A32B32G32R32I: 318 c = Float4(*Pointer<Int4>(element)); 319 break; 320 case FORMAT_A32B32G32R32UI: 321 c = Float4(*Pointer<UInt4>(element)); 322 break; 323 case FORMAT_X32B32G32R32I: 324 c = Float4(*Pointer<Int4>(element)); 325 c.w = float(0x7FFFFFFF); 326 break; 327 case FORMAT_X32B32G32R32UI: 328 c = Float4(*Pointer<UInt4>(element)); 329 c.w = float(0xFFFFFFFF); 330 break; 331 case FORMAT_G8R8I: 332 case FORMAT_G8R8_SNORM: 333 c.x = Float(Int(*Pointer<SByte>(element + 0))); 334 c.y = Float(Int(*Pointer<SByte>(element + 1))); 335 c.w = float(0x7F); 336 break; 337 case FORMAT_G8R8: 338 case FORMAT_G8R8UI: 339 c.x = Float(Int(*Pointer<Byte>(element + 0))); 340 c.y = Float(Int(*Pointer<Byte>(element + 1))); 341 c.w = float(0xFF); 342 break; 343 case FORMAT_G16R16I: 344 c.x = Float(Int(*Pointer<Short>(element + 0))); 345 c.y = Float(Int(*Pointer<Short>(element + 2))); 346 c.w = float(0x7FFF); 347 break; 348 case FORMAT_G16R16: 349 case FORMAT_G16R16UI: 350 c.x = Float(Int(*Pointer<UShort>(element + 0))); 351 c.y = Float(Int(*Pointer<UShort>(element + 2))); 352 c.w = float(0xFFFF); 353 break; 354 case FORMAT_G32R32I: 355 c.x = Float(*Pointer<Int>(element + 0)); 356 c.y = Float(*Pointer<Int>(element + 4)); 357 c.w = float(0x7FFFFFFF); 358 break; 359 case FORMAT_G32R32UI: 360 c.x = Float(*Pointer<UInt>(element + 0)); 361 c.y = Float(*Pointer<UInt>(element + 4)); 362 c.w = float(0xFFFFFFFF); 363 break; 364 case FORMAT_A32B32G32R32F: 365 c = *Pointer<Float4>(element); 366 break; 367 case FORMAT_X32B32G32R32F: 368 case FORMAT_X32B32G32R32F_UNSIGNED: 369 case FORMAT_B32G32R32F: 370 c.z = *Pointer<Float>(element + 8); 371 case FORMAT_G32R32F: 372 c.x = *Pointer<Float>(element + 0); 373 c.y = *Pointer<Float>(element + 4); 374 break; 375 case FORMAT_R32F: 376 c.x = *Pointer<Float>(element); 377 break; 378 case FORMAT_R5G6B5: 379 c.x = Float(Int((*Pointer<UShort>(element) & UShort(0xF800)) >> UShort(11))); 380 c.y = Float(Int((*Pointer<UShort>(element) & UShort(0x07E0)) >> UShort(5))); 381 c.z = Float(Int(*Pointer<UShort>(element) & UShort(0x001F))); 382 break; 383 case FORMAT_A2B10G10R10: 384 case FORMAT_A2B10G10R10UI: 385 c.x = Float(Int((*Pointer<UInt>(element) & UInt(0x000003FF)))); 386 c.y = Float(Int((*Pointer<UInt>(element) & UInt(0x000FFC00)) >> 10)); 387 c.z = Float(Int((*Pointer<UInt>(element) & UInt(0x3FF00000)) >> 20)); 388 c.w = Float(Int((*Pointer<UInt>(element) & UInt(0xC0000000)) >> 30)); 389 break; 390 case FORMAT_D16: 391 c.x = Float(Int((*Pointer<UShort>(element)))); 392 break; 393 case FORMAT_D24S8: 394 c.x = Float(Int((*Pointer<UInt>(element)))); 395 break; 396 case FORMAT_D32: 397 c.x = Float(Int((*Pointer<UInt>(element)))); 398 break; 399 case FORMAT_D32F_COMPLEMENTARY: 400 case FORMAT_D32FS8_COMPLEMENTARY: 401 c.x = 1.0f - *Pointer<Float>(element); 402 break; 403 case FORMAT_D32F: 404 case FORMAT_D32FS8: 405 case FORMAT_D32F_LOCKABLE: 406 case FORMAT_D32FS8_TEXTURE: 407 case FORMAT_D32F_SHADOW: 408 case FORMAT_D32FS8_SHADOW: 409 c.x = *Pointer<Float>(element); 410 break; 411 case FORMAT_S8: 412 c.x = Float(Int(*Pointer<Byte>(element))); 413 break; 414 default: 415 return false; 416 } 417 418 return true; 419 } 420 421 bool Blitter::write(Float4 &c, Pointer<Byte> element, const State &state) 422 { 423 bool writeR = state.writeRed; 424 bool writeG = state.writeGreen; 425 bool writeB = state.writeBlue; 426 bool writeA = state.writeAlpha; 427 bool writeRGBA = writeR && writeG && writeB && writeA; 428 429 switch(state.destFormat) 430 { 431 case FORMAT_L8: 432 *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); 433 break; 434 case FORMAT_A8: 435 if(writeA) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.w))); } 436 break; 437 case FORMAT_A8R8G8B8: 438 if(writeRGBA) 439 { 440 Short4 c0 = RoundShort4(c.zyxw); 441 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0)); 442 } 443 else 444 { 445 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); } 446 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } 447 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); } 448 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); } 449 } 450 break; 451 case FORMAT_A8B8G8R8: 452 case FORMAT_SRGB8_A8: 453 if(writeRGBA) 454 { 455 Short4 c0 = RoundShort4(c); 456 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0)); 457 } 458 else 459 { 460 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); } 461 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } 462 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); } 463 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); } 464 } 465 break; 466 case FORMAT_X8R8G8B8: 467 if(writeRGBA) 468 { 469 Short4 c0 = RoundShort4(c.zyxw) | Short4(0x0000, 0x0000, 0x0000, 0x00FF); 470 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0)); 471 } 472 else 473 { 474 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); } 475 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } 476 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); } 477 if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); } 478 } 479 break; 480 case FORMAT_X8B8G8R8: 481 case FORMAT_SRGB8_X8: 482 if(writeRGBA) 483 { 484 Short4 c0 = RoundShort4(c) | Short4(0x0000, 0x0000, 0x0000, 0x00FF); 485 *Pointer<Byte4>(element) = Byte4(PackUnsigned(c0, c0)); 486 } 487 else 488 { 489 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); } 490 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } 491 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); } 492 if(writeA) { *Pointer<Byte>(element + 3) = Byte(0xFF); } 493 } 494 break; 495 case FORMAT_R8G8B8: 496 if(writeR) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.x))); } 497 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } 498 if(writeB) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.z))); } 499 break; 500 case FORMAT_B8G8R8: 501 if(writeR) { *Pointer<Byte>(element + 0) = Byte(RoundInt(Float(c.x))); } 502 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } 503 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); } 504 break; 505 case FORMAT_A32B32G32R32F: 506 if(writeRGBA) 507 { 508 *Pointer<Float4>(element) = c; 509 } 510 else 511 { 512 if(writeR) { *Pointer<Float>(element) = c.x; } 513 if(writeG) { *Pointer<Float>(element + 4) = c.y; } 514 if(writeB) { *Pointer<Float>(element + 8) = c.z; } 515 if(writeA) { *Pointer<Float>(element + 12) = c.w; } 516 } 517 break; 518 case FORMAT_X32B32G32R32F: 519 case FORMAT_X32B32G32R32F_UNSIGNED: 520 if(writeA) { *Pointer<Float>(element + 12) = 1.0f; } 521 case FORMAT_B32G32R32F: 522 if(writeR) { *Pointer<Float>(element) = c.x; } 523 if(writeG) { *Pointer<Float>(element + 4) = c.y; } 524 if(writeB) { *Pointer<Float>(element + 8) = c.z; } 525 break; 526 case FORMAT_G32R32F: 527 if(writeR && writeG) 528 { 529 *Pointer<Float2>(element) = Float2(c); 530 } 531 else 532 { 533 if(writeR) { *Pointer<Float>(element) = c.x; } 534 if(writeG) { *Pointer<Float>(element + 4) = c.y; } 535 } 536 break; 537 case FORMAT_R32F: 538 if(writeR) { *Pointer<Float>(element) = c.x; } 539 break; 540 case FORMAT_A8B8G8R8I: 541 case FORMAT_A8B8G8R8_SNORM: 542 if(writeA) { *Pointer<SByte>(element + 3) = SByte(RoundInt(Float(c.w))); } 543 case FORMAT_X8B8G8R8I: 544 case FORMAT_X8B8G8R8_SNORM: 545 if(writeA && (state.destFormat == FORMAT_X8B8G8R8I || state.destFormat == FORMAT_X8B8G8R8_SNORM)) 546 { 547 *Pointer<SByte>(element + 3) = SByte(0x7F); 548 } 549 if(writeB) { *Pointer<SByte>(element + 2) = SByte(RoundInt(Float(c.z))); } 550 case FORMAT_G8R8I: 551 case FORMAT_G8R8_SNORM: 552 if(writeG) { *Pointer<SByte>(element + 1) = SByte(RoundInt(Float(c.y))); } 553 case FORMAT_R8I: 554 case FORMAT_R8_SNORM: 555 if(writeR) { *Pointer<SByte>(element) = SByte(RoundInt(Float(c.x))); } 556 break; 557 case FORMAT_A8B8G8R8UI: 558 if(writeA) { *Pointer<Byte>(element + 3) = Byte(RoundInt(Float(c.w))); } 559 case FORMAT_X8B8G8R8UI: 560 if(writeA && (state.destFormat == FORMAT_X8B8G8R8UI)) 561 { 562 *Pointer<Byte>(element + 3) = Byte(0xFF); 563 } 564 if(writeB) { *Pointer<Byte>(element + 2) = Byte(RoundInt(Float(c.z))); } 565 case FORMAT_G8R8UI: 566 case FORMAT_G8R8: 567 if(writeG) { *Pointer<Byte>(element + 1) = Byte(RoundInt(Float(c.y))); } 568 case FORMAT_R8UI: 569 case FORMAT_R8: 570 if(writeR) { *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); } 571 break; 572 case FORMAT_A16B16G16R16I: 573 if(writeRGBA) 574 { 575 *Pointer<Short4>(element) = Short4(RoundInt(c)); 576 } 577 else 578 { 579 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); } 580 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); } 581 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); } 582 if(writeA) { *Pointer<Short>(element + 6) = Short(RoundInt(Float(c.w))); } 583 } 584 break; 585 case FORMAT_X16B16G16R16I: 586 if(writeRGBA) 587 { 588 *Pointer<Short4>(element) = Short4(RoundInt(c)); 589 } 590 else 591 { 592 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); } 593 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); } 594 if(writeB) { *Pointer<Short>(element + 4) = Short(RoundInt(Float(c.z))); } 595 } 596 if(writeA) { *Pointer<Short>(element + 6) = Short(0x7F); } 597 break; 598 case FORMAT_G16R16I: 599 if(writeR && writeG) 600 { 601 *Pointer<Short2>(element) = Short2(Short4(RoundInt(c))); 602 } 603 else 604 { 605 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); } 606 if(writeG) { *Pointer<Short>(element + 2) = Short(RoundInt(Float(c.y))); } 607 } 608 break; 609 case FORMAT_R16I: 610 if(writeR) { *Pointer<Short>(element) = Short(RoundInt(Float(c.x))); } 611 break; 612 case FORMAT_A16B16G16R16UI: 613 case FORMAT_A16B16G16R16: 614 if(writeRGBA) 615 { 616 *Pointer<UShort4>(element) = UShort4(RoundInt(c)); 617 } 618 else 619 { 620 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); } 621 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); } 622 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); } 623 if(writeA) { *Pointer<UShort>(element + 6) = UShort(RoundInt(Float(c.w))); } 624 } 625 break; 626 case FORMAT_X16B16G16R16UI: 627 if(writeRGBA) 628 { 629 *Pointer<UShort4>(element) = UShort4(RoundInt(c)); 630 } 631 else 632 { 633 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); } 634 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); } 635 if(writeB) { *Pointer<UShort>(element + 4) = UShort(RoundInt(Float(c.z))); } 636 } 637 if(writeA) { *Pointer<UShort>(element + 6) = UShort(0xFF); } 638 break; 639 case FORMAT_G16R16UI: 640 case FORMAT_G16R16: 641 if(writeR && writeG) 642 { 643 *Pointer<UShort2>(element) = UShort2(UShort4(RoundInt(c))); 644 } 645 else 646 { 647 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); } 648 if(writeG) { *Pointer<UShort>(element + 2) = UShort(RoundInt(Float(c.y))); } 649 } 650 break; 651 case FORMAT_R16UI: 652 if(writeR) { *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); } 653 break; 654 case FORMAT_A32B32G32R32I: 655 if(writeRGBA) 656 { 657 *Pointer<Int4>(element) = RoundInt(c); 658 } 659 else 660 { 661 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); } 662 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); } 663 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); } 664 if(writeA) { *Pointer<Int>(element + 12) = RoundInt(Float(c.w)); } 665 } 666 break; 667 case FORMAT_X32B32G32R32I: 668 if(writeRGBA) 669 { 670 *Pointer<Int4>(element) = RoundInt(c); 671 } 672 else 673 { 674 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); } 675 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); } 676 if(writeB) { *Pointer<Int>(element + 8) = RoundInt(Float(c.z)); } 677 } 678 if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); } 679 break; 680 case FORMAT_G32R32I: 681 if(writeG) { *Pointer<Int>(element + 4) = RoundInt(Float(c.y)); } 682 case FORMAT_R32I: 683 if(writeR) { *Pointer<Int>(element) = RoundInt(Float(c.x)); } 684 break; 685 case FORMAT_A32B32G32R32UI: 686 if(writeRGBA) 687 { 688 *Pointer<UInt4>(element) = UInt4(RoundInt(c)); 689 } 690 else 691 { 692 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); } 693 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); } 694 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); } 695 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(RoundInt(Float(c.w))); } 696 } 697 break; 698 case FORMAT_X32B32G32R32UI: 699 if(writeRGBA) 700 { 701 *Pointer<UInt4>(element) = UInt4(RoundInt(c)); 702 } 703 else 704 { 705 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); } 706 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); } 707 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(RoundInt(Float(c.z))); } 708 } 709 if(writeA) { *Pointer<UInt4>(element + 12) = UInt4(0xFFFFFFFF); } 710 break; 711 case FORMAT_G32R32UI: 712 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(RoundInt(Float(c.y))); } 713 case FORMAT_R32UI: 714 if(writeR) { *Pointer<UInt>(element) = As<UInt>(RoundInt(Float(c.x))); } 715 break; 716 case FORMAT_R5G6B5: 717 if(writeR && writeG && writeB) 718 { 719 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.z)) | 720 (RoundInt(Float(c.y)) << Int(5)) | 721 (RoundInt(Float(c.x)) << Int(11))); 722 } 723 else 724 { 725 unsigned short mask = (writeB ? 0x001F : 0x0000) | (writeG ? 0x07E0 : 0x0000) | (writeR ? 0xF800 : 0x0000); 726 unsigned short unmask = ~mask; 727 *Pointer<UShort>(element) = (*Pointer<UShort>(element) & UShort(unmask)) | 728 (UShort(RoundInt(Float(c.z)) | 729 (RoundInt(Float(c.y)) << Int(5)) | 730 (RoundInt(Float(c.x)) << Int(11))) & UShort(mask)); 731 } 732 break; 733 case FORMAT_A2B10G10R10: 734 case FORMAT_A2B10G10R10UI: 735 if(writeRGBA) 736 { 737 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x)) | 738 (RoundInt(Float(c.y)) << 10) | 739 (RoundInt(Float(c.z)) << 20) | 740 (RoundInt(Float(c.w)) << 30)); 741 } 742 else 743 { 744 unsigned int mask = (writeA ? 0xC0000000 : 0x0000) | 745 (writeB ? 0x3FF00000 : 0x0000) | 746 (writeG ? 0x000FFC00 : 0x0000) | 747 (writeR ? 0x000003FF : 0x0000); 748 unsigned int unmask = ~mask; 749 *Pointer<UInt>(element) = (*Pointer<UInt>(element) & UInt(unmask)) | 750 (UInt(RoundInt(Float(c.x)) | 751 (RoundInt(Float(c.y)) << 10) | 752 (RoundInt(Float(c.z)) << 20) | 753 (RoundInt(Float(c.w)) << 30)) & UInt(mask)); 754 } 755 break; 756 case FORMAT_D16: 757 *Pointer<UShort>(element) = UShort(RoundInt(Float(c.x))); 758 break; 759 case FORMAT_D24S8: 760 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x))); 761 break; 762 case FORMAT_D32: 763 *Pointer<UInt>(element) = UInt(RoundInt(Float(c.x))); 764 break; 765 case FORMAT_D32F_COMPLEMENTARY: 766 case FORMAT_D32FS8_COMPLEMENTARY: 767 *Pointer<Float>(element) = 1.0f - c.x; 768 break; 769 case FORMAT_D32F: 770 case FORMAT_D32FS8: 771 case FORMAT_D32F_LOCKABLE: 772 case FORMAT_D32FS8_TEXTURE: 773 case FORMAT_D32F_SHADOW: 774 case FORMAT_D32FS8_SHADOW: 775 *Pointer<Float>(element) = c.x; 776 break; 777 case FORMAT_S8: 778 *Pointer<Byte>(element) = Byte(RoundInt(Float(c.x))); 779 break; 780 default: 781 return false; 782 } 783 return true; 784 } 785 786 bool Blitter::read(Int4 &c, Pointer<Byte> element, const State &state) 787 { 788 c = Int4(0, 0, 0, 1); 789 790 switch(state.sourceFormat) 791 { 792 case FORMAT_A8B8G8R8I: 793 c = Insert(c, Int(*Pointer<SByte>(element + 3)), 3); 794 case FORMAT_X8B8G8R8I: 795 c = Insert(c, Int(*Pointer<SByte>(element + 2)), 2); 796 case FORMAT_G8R8I: 797 c = Insert(c, Int(*Pointer<SByte>(element + 1)), 1); 798 case FORMAT_R8I: 799 c = Insert(c, Int(*Pointer<SByte>(element)), 0); 800 break; 801 case FORMAT_A8B8G8R8UI: 802 c = Insert(c, Int(*Pointer<Byte>(element + 3)), 3); 803 case FORMAT_X8B8G8R8UI: 804 c = Insert(c, Int(*Pointer<Byte>(element + 2)), 2); 805 case FORMAT_G8R8UI: 806 c = Insert(c, Int(*Pointer<Byte>(element + 1)), 1); 807 case FORMAT_R8UI: 808 c = Insert(c, Int(*Pointer<Byte>(element)), 0); 809 break; 810 case FORMAT_A16B16G16R16I: 811 c = Insert(c, Int(*Pointer<Short>(element + 6)), 3); 812 case FORMAT_X16B16G16R16I: 813 c = Insert(c, Int(*Pointer<Short>(element + 4)), 2); 814 case FORMAT_G16R16I: 815 c = Insert(c, Int(*Pointer<Short>(element + 2)), 1); 816 case FORMAT_R16I: 817 c = Insert(c, Int(*Pointer<Short>(element)), 0); 818 break; 819 case FORMAT_A16B16G16R16UI: 820 c = Insert(c, Int(*Pointer<UShort>(element + 6)), 3); 821 case FORMAT_X16B16G16R16UI: 822 c = Insert(c, Int(*Pointer<UShort>(element + 4)), 2); 823 case FORMAT_G16R16UI: 824 c = Insert(c, Int(*Pointer<UShort>(element + 2)), 1); 825 case FORMAT_R16UI: 826 c = Insert(c, Int(*Pointer<UShort>(element)), 0); 827 break; 828 case FORMAT_A32B32G32R32I: 829 case FORMAT_A32B32G32R32UI: 830 c = *Pointer<Int4>(element); 831 break; 832 case FORMAT_X32B32G32R32I: 833 case FORMAT_X32B32G32R32UI: 834 c = Insert(c, *Pointer<Int>(element + 8), 2); 835 case FORMAT_G32R32I: 836 case FORMAT_G32R32UI: 837 c = Insert(c, *Pointer<Int>(element + 4), 1); 838 case FORMAT_R32I: 839 case FORMAT_R32UI: 840 c = Insert(c, *Pointer<Int>(element), 0); 841 break; 842 default: 843 return false; 844 } 845 846 return true; 847 } 848 849 bool Blitter::write(Int4 &c, Pointer<Byte> element, const State &state) 850 { 851 bool writeR = state.writeRed; 852 bool writeG = state.writeGreen; 853 bool writeB = state.writeBlue; 854 bool writeA = state.writeAlpha; 855 bool writeRGBA = writeR && writeG && writeB && writeA; 856 857 switch(state.destFormat) 858 { 859 case FORMAT_A8B8G8R8I: 860 if(writeA) { *Pointer<SByte>(element + 3) = SByte(Extract(c, 3)); } 861 case FORMAT_X8B8G8R8I: 862 if(writeA && (state.destFormat != FORMAT_A8B8G8R8I)) 863 { 864 *Pointer<SByte>(element + 3) = SByte(0x7F); 865 } 866 if(writeB) { *Pointer<SByte>(element + 2) = SByte(Extract(c, 2)); } 867 case FORMAT_G8R8I: 868 if(writeG) { *Pointer<SByte>(element + 1) = SByte(Extract(c, 1)); } 869 case FORMAT_R8I: 870 if(writeR) { *Pointer<SByte>(element) = SByte(Extract(c, 0)); } 871 break; 872 case FORMAT_A8B8G8R8UI: 873 if(writeA) { *Pointer<Byte>(element + 3) = Byte(Extract(c, 3)); } 874 case FORMAT_X8B8G8R8UI: 875 if(writeA && (state.destFormat != FORMAT_A8B8G8R8UI)) 876 { 877 *Pointer<Byte>(element + 3) = Byte(0xFF); 878 } 879 if(writeB) { *Pointer<Byte>(element + 2) = Byte(Extract(c, 2)); } 880 case FORMAT_G8R8UI: 881 if(writeG) { *Pointer<Byte>(element + 1) = Byte(Extract(c, 1)); } 882 case FORMAT_R8UI: 883 if(writeR) { *Pointer<Byte>(element) = Byte(Extract(c, 0)); } 884 break; 885 case FORMAT_A16B16G16R16I: 886 if(writeA) { *Pointer<Short>(element + 6) = Short(Extract(c, 3)); } 887 case FORMAT_X16B16G16R16I: 888 if(writeA && (state.destFormat != FORMAT_A16B16G16R16I)) 889 { 890 *Pointer<Short>(element + 6) = Short(0x7FFF); 891 } 892 if(writeB) { *Pointer<Short>(element + 4) = Short(Extract(c, 2)); } 893 case FORMAT_G16R16I: 894 if(writeG) { *Pointer<Short>(element + 2) = Short(Extract(c, 1)); } 895 case FORMAT_R16I: 896 if(writeR) { *Pointer<Short>(element) = Short(Extract(c, 0)); } 897 break; 898 case FORMAT_A16B16G16R16UI: 899 if(writeA) { *Pointer<UShort>(element + 6) = UShort(Extract(c, 3)); } 900 case FORMAT_X16B16G16R16UI: 901 if(writeA && (state.destFormat != FORMAT_A16B16G16R16UI)) 902 { 903 *Pointer<UShort>(element + 6) = UShort(0xFFFF); 904 } 905 if(writeB) { *Pointer<UShort>(element + 4) = UShort(Extract(c, 2)); } 906 case FORMAT_G16R16UI: 907 if(writeG) { *Pointer<UShort>(element + 2) = UShort(Extract(c, 1)); } 908 case FORMAT_R16UI: 909 if(writeR) { *Pointer<UShort>(element) = UShort(Extract(c, 0)); } 910 break; 911 case FORMAT_A32B32G32R32I: 912 if(writeRGBA) 913 { 914 *Pointer<Int4>(element) = c; 915 } 916 else 917 { 918 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); } 919 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); } 920 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); } 921 if(writeA) { *Pointer<Int>(element + 12) = Extract(c, 3); } 922 } 923 break; 924 case FORMAT_X32B32G32R32I: 925 if(writeRGBA) 926 { 927 *Pointer<Int4>(element) = c; 928 } 929 else 930 { 931 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); } 932 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); } 933 if(writeB) { *Pointer<Int>(element + 8) = Extract(c, 2); } 934 } 935 if(writeA) { *Pointer<Int>(element + 12) = Int(0x7FFFFFFF); } 936 break; 937 case FORMAT_G32R32I: 938 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); } 939 if(writeG) { *Pointer<Int>(element + 4) = Extract(c, 1); } 940 break; 941 case FORMAT_R32I: 942 if(writeR) { *Pointer<Int>(element) = Extract(c, 0); } 943 break; 944 case FORMAT_A32B32G32R32UI: 945 if(writeRGBA) 946 { 947 *Pointer<UInt4>(element) = As<UInt4>(c); 948 } 949 else 950 { 951 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); } 952 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); } 953 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); } 954 if(writeA) { *Pointer<UInt>(element + 12) = As<UInt>(Extract(c, 3)); } 955 } 956 break; 957 case FORMAT_X32B32G32R32UI: 958 if(writeRGBA) 959 { 960 *Pointer<UInt4>(element) = As<UInt4>(c); 961 } 962 else 963 { 964 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); } 965 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); } 966 if(writeB) { *Pointer<UInt>(element + 8) = As<UInt>(Extract(c, 2)); } 967 } 968 if(writeA) { *Pointer<UInt>(element + 3) = UInt(0xFFFFFFFF); } 969 break; 970 case FORMAT_G32R32UI: 971 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); } 972 if(writeG) { *Pointer<UInt>(element + 4) = As<UInt>(Extract(c, 1)); } 973 break; 974 case FORMAT_R32UI: 975 if(writeR) { *Pointer<UInt>(element) = As<UInt>(Extract(c, 0)); } 976 break; 977 default: 978 return false; 979 } 980 981 return true; 982 } 983 984 bool Blitter::GetScale(float4 &scale, Format format) 985 { 986 switch(format) 987 { 988 case FORMAT_L8: 989 case FORMAT_A8: 990 case FORMAT_A8R8G8B8: 991 case FORMAT_X8R8G8B8: 992 case FORMAT_R8: 993 case FORMAT_G8R8: 994 case FORMAT_R8G8B8: 995 case FORMAT_B8G8R8: 996 case FORMAT_X8B8G8R8: 997 case FORMAT_A8B8G8R8: 998 case FORMAT_SRGB8_X8: 999 case FORMAT_SRGB8_A8: 1000 scale = vector(0xFF, 0xFF, 0xFF, 0xFF); 1001 break; 1002 case FORMAT_R8_SNORM: 1003 case FORMAT_G8R8_SNORM: 1004 case FORMAT_X8B8G8R8_SNORM: 1005 case FORMAT_A8B8G8R8_SNORM: 1006 scale = vector(0x7F, 0x7F, 0x7F, 0x7F); 1007 break; 1008 case FORMAT_A16B16G16R16: 1009 scale = vector(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF); 1010 break; 1011 case FORMAT_R8I: 1012 case FORMAT_R8UI: 1013 case FORMAT_G8R8I: 1014 case FORMAT_G8R8UI: 1015 case FORMAT_X8B8G8R8I: 1016 case FORMAT_X8B8G8R8UI: 1017 case FORMAT_A8B8G8R8I: 1018 case FORMAT_A8B8G8R8UI: 1019 case FORMAT_R16I: 1020 case FORMAT_R16UI: 1021 case FORMAT_G16R16: 1022 case FORMAT_G16R16I: 1023 case FORMAT_G16R16UI: 1024 case FORMAT_X16B16G16R16I: 1025 case FORMAT_X16B16G16R16UI: 1026 case FORMAT_A16B16G16R16I: 1027 case FORMAT_A16B16G16R16UI: 1028 case FORMAT_R32I: 1029 case FORMAT_R32UI: 1030 case FORMAT_G32R32I: 1031 case FORMAT_G32R32UI: 1032 case FORMAT_X32B32G32R32I: 1033 case FORMAT_X32B32G32R32UI: 1034 case FORMAT_A32B32G32R32I: 1035 case FORMAT_A32B32G32R32UI: 1036 case FORMAT_A32B32G32R32F: 1037 case FORMAT_X32B32G32R32F: 1038 case FORMAT_X32B32G32R32F_UNSIGNED: 1039 case FORMAT_B32G32R32F: 1040 case FORMAT_G32R32F: 1041 case FORMAT_R32F: 1042 case FORMAT_A2B10G10R10UI: 1043 scale = vector(1.0f, 1.0f, 1.0f, 1.0f); 1044 break; 1045 case FORMAT_R5G6B5: 1046 scale = vector(0x1F, 0x3F, 0x1F, 1.0f); 1047 break; 1048 case FORMAT_A2B10G10R10: 1049 scale = vector(0x3FF, 0x3FF, 0x3FF, 0x03); 1050 break; 1051 case FORMAT_D16: 1052 scale = vector(0xFFFF, 0.0f, 0.0f, 0.0f); 1053 break; 1054 case FORMAT_D24S8: 1055 scale = vector(0xFFFFFF, 0.0f, 0.0f, 0.0f); 1056 break; 1057 case FORMAT_D32: 1058 scale = vector(static_cast<float>(0xFFFFFFFF), 0.0f, 0.0f, 0.0f); 1059 break; 1060 case FORMAT_D32F: 1061 case FORMAT_D32FS8: 1062 case FORMAT_D32F_COMPLEMENTARY: 1063 case FORMAT_D32FS8_COMPLEMENTARY: 1064 case FORMAT_D32F_LOCKABLE: 1065 case FORMAT_D32FS8_TEXTURE: 1066 case FORMAT_D32F_SHADOW: 1067 case FORMAT_D32FS8_SHADOW: 1068 case FORMAT_S8: 1069 scale = vector(1.0f, 1.0f, 1.0f, 1.0f); 1070 break; 1071 default: 1072 return false; 1073 } 1074 1075 return true; 1076 } 1077 1078 bool Blitter::ApplyScaleAndClamp(Float4 &value, const State &state, bool preScaled) 1079 { 1080 float4 scale, unscale; 1081 if(state.clearOperation && 1082 Surface::isNonNormalizedInteger(state.sourceFormat) && 1083 !Surface::isNonNormalizedInteger(state.destFormat)) 1084 { 1085 // If we're clearing a buffer from an int or uint color into a normalized color, 1086 // then the whole range of the int or uint color must be scaled between 0 and 1. 1087 switch(state.sourceFormat) 1088 { 1089 case FORMAT_A32B32G32R32I: 1090 unscale = replicate(static_cast<float>(0x7FFFFFFF)); 1091 break; 1092 case FORMAT_A32B32G32R32UI: 1093 unscale = replicate(static_cast<float>(0xFFFFFFFF)); 1094 break; 1095 default: 1096 return false; 1097 } 1098 } 1099 else if(!GetScale(unscale, state.sourceFormat)) 1100 { 1101 return false; 1102 } 1103 1104 if(!GetScale(scale, state.destFormat)) 1105 { 1106 return false; 1107 } 1108 1109 bool srcSRGB = Surface::isSRGBformat(state.sourceFormat); 1110 bool dstSRGB = Surface::isSRGBformat(state.destFormat); 1111 1112 if(state.convertSRGB && ((srcSRGB && !preScaled) || dstSRGB)) // One of the formats is sRGB encoded. 1113 { 1114 value *= preScaled ? Float4(1.0f / scale.x, 1.0f / scale.y, 1.0f / scale.z, 1.0f / scale.w) : // Unapply scale 1115 Float4(1.0f / unscale.x, 1.0f / unscale.y, 1.0f / unscale.z, 1.0f / unscale.w); // Apply unscale 1116 value = (srcSRGB && !preScaled) ? sRGBtoLinear(value) : LinearToSRGB(value); 1117 value *= Float4(scale.x, scale.y, scale.z, scale.w); // Apply scale 1118 } 1119 else if(unscale != scale) 1120 { 1121 value *= Float4(scale.x / unscale.x, scale.y / unscale.y, scale.z / unscale.z, scale.w / unscale.w); 1122 } 1123 1124 if(state.destFormat == FORMAT_X32B32G32R32F_UNSIGNED) 1125 { 1126 value = Max(value, Float4(0.0f)); // TODO: Only necessary if source is signed. 1127 } 1128 else if(Surface::isFloatFormat(state.sourceFormat) && !Surface::isFloatFormat(state.destFormat)) 1129 { 1130 value = Min(value, Float4(scale.x, scale.y, scale.z, scale.w)); 1131 1132 value = Max(value, Float4(Surface::isUnsignedComponent(state.destFormat, 0) ? 0.0f : -scale.x, 1133 Surface::isUnsignedComponent(state.destFormat, 1) ? 0.0f : -scale.y, 1134 Surface::isUnsignedComponent(state.destFormat, 2) ? 0.0f : -scale.z, 1135 Surface::isUnsignedComponent(state.destFormat, 3) ? 0.0f : -scale.w)); 1136 } 1137 1138 return true; 1139 } 1140 1141 Int Blitter::ComputeOffset(Int &x, Int &y, Int &pitchB, int bytes, bool quadLayout) 1142 { 1143 if(!quadLayout) 1144 { 1145 return y * pitchB + x * bytes; 1146 } 1147 else 1148 { 1149 // (x & ~1) * 2 + (x & 1) == (x - (x & 1)) * 2 + (x & 1) == x * 2 - (x & 1) * 2 + (x & 1) == x * 2 - (x & 1) 1150 return (y & Int(~1)) * pitchB + 1151 ((y & Int(1)) * 2 + x * 2 - (x & Int(1))) * bytes; 1152 } 1153 } 1154 1155 Float4 Blitter::LinearToSRGB(Float4 &c) 1156 { 1157 Float4 lc = Min(c, Float4(0.0031308f)) * Float4(12.92f); 1158 Float4 ec = Float4(1.055f) * power(c, Float4(1.0f / 2.4f)) - Float4(0.055f); 1159 1160 Float4 s = c; 1161 s.xyz = Max(lc, ec); 1162 1163 return s; 1164 } 1165 1166 Float4 Blitter::sRGBtoLinear(Float4 &c) 1167 { 1168 Float4 lc = c * Float4(1.0f / 12.92f); 1169 Float4 ec = power((c + Float4(0.055f)) * Float4(1.0f / 1.055f), Float4(2.4f)); 1170 1171 Int4 linear = CmpLT(c, Float4(0.04045f)); 1172 1173 Float4 s = c; 1174 s.xyz = As<Float4>((linear & As<Int4>(lc)) | (~linear & As<Int4>(ec))); // FIXME: IfThenElse() 1175 1176 return s; 1177 } 1178 1179 Routine *Blitter::generate(const State &state) 1180 { 1181 Function<Void(Pointer<Byte>)> function; 1182 { 1183 Pointer<Byte> blit(function.Arg<0>()); 1184 1185 Pointer<Byte> source = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,source)); 1186 Pointer<Byte> dest = *Pointer<Pointer<Byte>>(blit + OFFSET(BlitData,dest)); 1187 Int sPitchB = *Pointer<Int>(blit + OFFSET(BlitData,sPitchB)); 1188 Int dPitchB = *Pointer<Int>(blit + OFFSET(BlitData,dPitchB)); 1189 1190 Float x0 = *Pointer<Float>(blit + OFFSET(BlitData,x0)); 1191 Float y0 = *Pointer<Float>(blit + OFFSET(BlitData,y0)); 1192 Float w = *Pointer<Float>(blit + OFFSET(BlitData,w)); 1193 Float h = *Pointer<Float>(blit + OFFSET(BlitData,h)); 1194 1195 Int x0d = *Pointer<Int>(blit + OFFSET(BlitData,x0d)); 1196 Int x1d = *Pointer<Int>(blit + OFFSET(BlitData,x1d)); 1197 Int y0d = *Pointer<Int>(blit + OFFSET(BlitData,y0d)); 1198 Int y1d = *Pointer<Int>(blit + OFFSET(BlitData,y1d)); 1199 1200 Int sWidth = *Pointer<Int>(blit + OFFSET(BlitData,sWidth)); 1201 Int sHeight = *Pointer<Int>(blit + OFFSET(BlitData,sHeight)); 1202 1203 bool intSrc = Surface::isNonNormalizedInteger(state.sourceFormat); 1204 bool intDst = Surface::isNonNormalizedInteger(state.destFormat); 1205 bool intBoth = intSrc && intDst; 1206 bool srcQuadLayout = Surface::hasQuadLayout(state.sourceFormat); 1207 bool dstQuadLayout = Surface::hasQuadLayout(state.destFormat); 1208 int srcBytes = Surface::bytes(state.sourceFormat); 1209 int dstBytes = Surface::bytes(state.destFormat); 1210 1211 bool hasConstantColorI = false; 1212 Int4 constantColorI; 1213 bool hasConstantColorF = false; 1214 Float4 constantColorF; 1215 if(state.clearOperation) 1216 { 1217 if(intBoth) // Integer types 1218 { 1219 if(!read(constantColorI, source, state)) 1220 { 1221 return nullptr; 1222 } 1223 hasConstantColorI = true; 1224 } 1225 else 1226 { 1227 if(!read(constantColorF, source, state)) 1228 { 1229 return nullptr; 1230 } 1231 hasConstantColorF = true; 1232 1233 if(!ApplyScaleAndClamp(constantColorF, state)) 1234 { 1235 return nullptr; 1236 } 1237 } 1238 } 1239 1240 Float y = y0; 1241 1242 For(Int j = y0d, j < y1d, j++) 1243 { 1244 Float x = x0; 1245 Pointer<Byte> destLine = dest + (dstQuadLayout ? j & Int(~1) : RValue<Int>(j)) * dPitchB; 1246 1247 For(Int i = x0d, i < x1d, i++) 1248 { 1249 Pointer<Byte> d = destLine + (dstQuadLayout ? (((j & Int(1)) << 1) + (i * 2) - (i & Int(1))) : RValue<Int>(i)) * dstBytes; 1250 1251 if(hasConstantColorI) 1252 { 1253 if(!write(constantColorI, d, state)) 1254 { 1255 return nullptr; 1256 } 1257 } 1258 else if(hasConstantColorF) 1259 { 1260 for(int s = 0; s < state.destSamples; s++) 1261 { 1262 if(!write(constantColorF, d, state)) 1263 { 1264 return nullptr; 1265 } 1266 1267 d += *Pointer<Int>(blit + OFFSET(BlitData, dSliceB)); 1268 } 1269 } 1270 else if(intBoth) // Integer types do not support filtering 1271 { 1272 Int4 color; // When both formats are true integer types, we don't go to float to avoid losing precision 1273 Int X = Int(x); 1274 Int Y = Int(y); 1275 1276 if(state.clampToEdge) 1277 { 1278 X = Clamp(X, 0, sWidth - 1); 1279 Y = Clamp(Y, 0, sHeight - 1); 1280 } 1281 1282 Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout); 1283 1284 if(!read(color, s, state)) 1285 { 1286 return nullptr; 1287 } 1288 1289 if(!write(color, d, state)) 1290 { 1291 return nullptr; 1292 } 1293 } 1294 else 1295 { 1296 Float4 color; 1297 1298 bool preScaled = false; 1299 if(!state.filter || intSrc) 1300 { 1301 Int X = Int(x); 1302 Int Y = Int(y); 1303 1304 if(state.clampToEdge) 1305 { 1306 X = Clamp(X, 0, sWidth - 1); 1307 Y = Clamp(Y, 0, sHeight - 1); 1308 } 1309 1310 Pointer<Byte> s = source + ComputeOffset(X, Y, sPitchB, srcBytes, srcQuadLayout); 1311 1312 if(!read(color, s, state)) 1313 { 1314 return nullptr; 1315 } 1316 } 1317 else // Bilinear filtering 1318 { 1319 Float X = x; 1320 Float Y = y; 1321 1322 if(state.clampToEdge) 1323 { 1324 X = Min(Max(x, 0.5f), Float(sWidth) - 0.5f); 1325 Y = Min(Max(y, 0.5f), Float(sHeight) - 0.5f); 1326 } 1327 1328 Float x0 = X - 0.5f; 1329 Float y0 = Y - 0.5f; 1330 1331 Int X0 = Max(Int(x0), 0); 1332 Int Y0 = Max(Int(y0), 0); 1333 1334 Int X1 = X0 + 1; 1335 Int Y1 = Y0 + 1; 1336 X1 = IfThenElse(X1 >= sWidth, X0, X1); 1337 Y1 = IfThenElse(Y1 >= sHeight, Y0, Y1); 1338 1339 Pointer<Byte> s00 = source + ComputeOffset(X0, Y0, sPitchB, srcBytes, srcQuadLayout); 1340 Pointer<Byte> s01 = source + ComputeOffset(X1, Y0, sPitchB, srcBytes, srcQuadLayout); 1341 Pointer<Byte> s10 = source + ComputeOffset(X0, Y1, sPitchB, srcBytes, srcQuadLayout); 1342 Pointer<Byte> s11 = source + ComputeOffset(X1, Y1, sPitchB, srcBytes, srcQuadLayout); 1343 1344 Float4 c00; if(!read(c00, s00, state)) return nullptr; 1345 Float4 c01; if(!read(c01, s01, state)) return nullptr; 1346 Float4 c10; if(!read(c10, s10, state)) return nullptr; 1347 Float4 c11; if(!read(c11, s11, state)) return nullptr; 1348 1349 if(state.convertSRGB && Surface::isSRGBformat(state.sourceFormat)) // sRGB -> RGB 1350 { 1351 if(!ApplyScaleAndClamp(c00, state)) return nullptr; 1352 if(!ApplyScaleAndClamp(c01, state)) return nullptr; 1353 if(!ApplyScaleAndClamp(c10, state)) return nullptr; 1354 if(!ApplyScaleAndClamp(c11, state)) return nullptr; 1355 preScaled = true; 1356 } 1357 1358 Float4 fx = Float4(x0 - Float(X0)); 1359 Float4 fy = Float4(y0 - Float(Y0)); 1360 Float4 ix = Float4(1.0f) - fx; 1361 Float4 iy = Float4(1.0f) - fy; 1362 1363 color = (c00 * ix + c01 * fx) * iy + 1364 (c10 * ix + c11 * fx) * fy; 1365 } 1366 1367 if(!ApplyScaleAndClamp(color, state, preScaled)) 1368 { 1369 return nullptr; 1370 } 1371 1372 for(int s = 0; s < state.destSamples; s++) 1373 { 1374 if(!write(color, d, state)) 1375 { 1376 return nullptr; 1377 } 1378 1379 d += *Pointer<Int>(blit + OFFSET(BlitData,dSliceB)); 1380 } 1381 } 1382 1383 if(!state.clearOperation) { x += w; } 1384 } 1385 1386 if(!state.clearOperation) { y += h; } 1387 } 1388 } 1389 1390 return function(L"BlitRoutine"); 1391 } 1392 1393 bool Blitter::blitReactor(Surface *source, const SliceRectF &sourceRect, Surface *dest, const SliceRect &destRect, const Blitter::Options &options) 1394 { 1395 ASSERT(!options.clearOperation || ((source->getWidth() == 1) && (source->getHeight() == 1) && (source->getDepth() == 1))); 1396 1397 Rect dRect = destRect; 1398 RectF sRect = sourceRect; 1399 if(destRect.x0 > destRect.x1) 1400 { 1401 swap(dRect.x0, dRect.x1); 1402 swap(sRect.x0, sRect.x1); 1403 } 1404 if(destRect.y0 > destRect.y1) 1405 { 1406 swap(dRect.y0, dRect.y1); 1407 swap(sRect.y0, sRect.y1); 1408 } 1409 1410 State state(options); 1411 state.clampToEdge = (sourceRect.x0 < 0.0f) || 1412 (sourceRect.y0 < 0.0f) || 1413 (sourceRect.x1 > (float)source->getWidth()) || 1414 (sourceRect.y1 > (float)source->getHeight()); 1415 1416 bool useSourceInternal = !source->isExternalDirty(); 1417 bool useDestInternal = !dest->isExternalDirty(); 1418 bool isStencil = options.useStencil; 1419 1420 state.sourceFormat = isStencil ? source->getStencilFormat() : source->getFormat(useSourceInternal); 1421 state.destFormat = isStencil ? dest->getStencilFormat() : dest->getFormat(useDestInternal); 1422 state.destSamples = dest->getSamples(); 1423 1424 criticalSection.lock(); 1425 Routine *blitRoutine = blitCache->query(state); 1426 1427 if(!blitRoutine) 1428 { 1429 blitRoutine = generate(state); 1430 1431 if(!blitRoutine) 1432 { 1433 criticalSection.unlock(); 1434 return false; 1435 } 1436 1437 blitCache->add(state, blitRoutine); 1438 } 1439 1440 criticalSection.unlock(); 1441 1442 void (*blitFunction)(const BlitData *data) = (void(*)(const BlitData*))blitRoutine->getEntry(); 1443 1444 BlitData data; 1445 1446 bool isRGBA = options.writeMask == 0xF; 1447 bool isEntireDest = dest->isEntire(destRect); 1448 1449 data.source = isStencil ? source->lockStencil(0, 0, 0, sw::PUBLIC) : 1450 source->lock(0, 0, sourceRect.slice, sw::LOCK_READONLY, sw::PUBLIC, useSourceInternal); 1451 data.dest = isStencil ? dest->lockStencil(0, 0, 0, sw::PUBLIC) : 1452 dest->lock(0, 0, destRect.slice, isRGBA ? (isEntireDest ? sw::LOCK_DISCARD : sw::LOCK_WRITEONLY) : sw::LOCK_READWRITE, sw::PUBLIC, useDestInternal); 1453 data.sPitchB = isStencil ? source->getStencilPitchB() : source->getPitchB(useSourceInternal); 1454 data.dPitchB = isStencil ? dest->getStencilPitchB() : dest->getPitchB(useDestInternal); 1455 data.dSliceB = isStencil ? dest->getStencilSliceB() : dest->getSliceB(useDestInternal); 1456 1457 data.w = sRect.width() / dRect.width(); 1458 data.h = sRect.height() / dRect.height(); 1459 data.x0 = sRect.x0 + 0.5f * data.w; 1460 data.y0 = sRect.y0 + 0.5f * data.h; 1461 1462 data.x0d = dRect.x0; 1463 data.x1d = dRect.x1; 1464 data.y0d = dRect.y0; 1465 data.y1d = dRect.y1; 1466 1467 data.sWidth = source->getWidth(); 1468 data.sHeight = source->getHeight(); 1469 1470 blitFunction(&data); 1471 1472 if(isStencil) 1473 { 1474 source->unlockStencil(); 1475 dest->unlockStencil(); 1476 } 1477 else 1478 { 1479 source->unlock(useSourceInternal); 1480 dest->unlock(useDestInternal); 1481 } 1482 1483 return true; 1484 } 1485} 1486