Surface.cpp revision 75b650f0e501750ae0ba66a435741731905dffc1
1// SwiftShader Software Renderer 2// 3// Copyright(c) 2005-2013 TransGaming Inc. 4// 5// All rights reserved. No part of this software may be copied, distributed, transmitted, 6// transcribed, stored in a retrieval system, translated into any human or computer 7// language by any means, or disclosed to third parties without the explicit written 8// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express 9// or implied, including but not limited to any patent rights, are granted to you. 10// 11 12#include "Surface.hpp" 13 14#include "Color.hpp" 15#include "Context.hpp" 16#include "ETC_Decoder.hpp" 17#include "Renderer.hpp" 18#include "Common/Half.hpp" 19#include "Common/Memory.hpp" 20#include "Common/CPUID.hpp" 21#include "Common/Resource.hpp" 22#include "Common/Debug.hpp" 23#include "Reactor/Reactor.hpp" 24 25#include <xmmintrin.h> 26#include <emmintrin.h> 27 28#undef min 29#undef max 30 31namespace sw 32{ 33 extern bool quadLayoutEnabled; 34 extern bool complementaryDepthBuffer; 35 extern TranscendentalPrecision logPrecision; 36 37 unsigned int *Surface::palette = 0; 38 unsigned int Surface::paletteID = 0; 39 40 void Rect::clip(int minX, int minY, int maxX, int maxY) 41 { 42 x0 = clamp(x0, minX, maxX); 43 y0 = clamp(y0, minY, maxY); 44 x1 = clamp(x1, minX, maxX); 45 y1 = clamp(y1, minY, maxY); 46 } 47 48 void Surface::Buffer::write(int x, int y, int z, const Color<float> &color) 49 { 50 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB; 51 52 write(element, color); 53 } 54 55 void Surface::Buffer::write(int x, int y, const Color<float> &color) 56 { 57 void *element = (unsigned char*)buffer + x * bytes + y * pitchB; 58 59 write(element, color); 60 } 61 62 inline void Surface::Buffer::write(void *element, const Color<float> &color) 63 { 64 switch(format) 65 { 66 case FORMAT_A8: 67 *(unsigned char*)element = unorm<8>(color.a); 68 break; 69 case FORMAT_R8I_SNORM: 70 *(char*)element = snorm<8>(color.r); 71 break; 72 case FORMAT_R8: 73 *(unsigned char*)element = unorm<8>(color.r); 74 break; 75 case FORMAT_R8I: 76 *(char*)element = scast<8>(color.r); 77 break; 78 case FORMAT_R8UI: 79 *(unsigned char*)element = ucast<8>(color.r); 80 break; 81 case FORMAT_R16I: 82 *(short*)element = scast<16>(color.r); 83 break; 84 case FORMAT_R16UI: 85 *(unsigned short*)element = ucast<16>(color.r); 86 break; 87 case FORMAT_R32I: 88 *(int*)element = static_cast<int>(color.r); 89 break; 90 case FORMAT_R32UI: 91 *(unsigned int*)element = static_cast<unsigned int>(color.r); 92 break; 93 case FORMAT_R3G3B2: 94 *(unsigned char*)element = (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0); 95 break; 96 case FORMAT_A8R3G3B2: 97 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0); 98 break; 99 case FORMAT_X4R4G4B4: 100 *(unsigned short*)element = 0xF000 | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0); 101 break; 102 case FORMAT_A4R4G4B4: 103 *(unsigned short*)element = (unorm<4>(color.a) << 12) | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0); 104 break; 105 case FORMAT_R4G4B4A4: 106 *(unsigned short*)element = (unorm<4>(color.r) << 12) | (unorm<4>(color.g) << 8) | (unorm<4>(color.b) << 4) | (unorm<4>(color.a) << 0); 107 break; 108 case FORMAT_R5G6B5: 109 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<6>(color.g) << 5) | (unorm<5>(color.b) << 0); 110 break; 111 case FORMAT_A1R5G5B5: 112 *(unsigned short*)element = (unorm<1>(color.a) << 15) | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0); 113 break; 114 case FORMAT_R5G5B5A1: 115 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<5>(color.g) << 6) | (unorm<5>(color.b) << 1) | (unorm<5>(color.a) << 0); 116 break; 117 case FORMAT_X1R5G5B5: 118 *(unsigned short*)element = 0x8000 | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0); 119 break; 120 case FORMAT_A8R8G8B8: 121 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0); 122 break; 123 case FORMAT_X8R8G8B8: 124 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0); 125 break; 126 case FORMAT_A8B8G8R8I_SNORM: 127 *(unsigned int*)element = (static_cast<unsigned int>(snorm<8>(color.a)) << 24) | 128 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) | 129 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) | 130 (static_cast<unsigned int>(snorm<8>(color.r)) << 0); 131 break; 132 case FORMAT_A8B8G8R8: 133 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0); 134 break; 135 case FORMAT_A8B8G8R8I: 136 *(unsigned int*)element = (static_cast<unsigned int>(scast<8>(color.a)) << 24) | 137 (static_cast<unsigned int>(scast<8>(color.b)) << 16) | 138 (static_cast<unsigned int>(scast<8>(color.g)) << 8) | 139 (static_cast<unsigned int>(scast<8>(color.r)) << 0); 140 break; 141 case FORMAT_A8B8G8R8UI: 142 *(unsigned int*)element = (ucast<8>(color.a) << 24) | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0); 143 break; 144 case FORMAT_X8B8G8R8I_SNORM: 145 *(unsigned int*)element = 0x7F000000 | 146 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) | 147 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) | 148 (static_cast<unsigned int>(snorm<8>(color.r)) << 0); 149 break; 150 case FORMAT_X8B8G8R8: 151 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0); 152 break; 153 case FORMAT_X8B8G8R8I: 154 *(unsigned int*)element = 0x7F000000 | 155 (static_cast<unsigned int>(scast<8>(color.b)) << 16) | 156 (static_cast<unsigned int>(scast<8>(color.g)) << 8) | 157 (static_cast<unsigned int>(scast<8>(color.r)) << 0); 158 case FORMAT_X8B8G8R8UI: 159 *(unsigned int*)element = 0xFF000000 | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0); 160 break; 161 case FORMAT_A2R10G10B10: 162 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.r) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.b) << 0); 163 break; 164 case FORMAT_A2B10G10R10: 165 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.b) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.r) << 0); 166 break; 167 case FORMAT_G8R8I_SNORM: 168 *(unsigned short*)element = (static_cast<unsigned short>(snorm<8>(color.g)) << 8) | 169 (static_cast<unsigned short>(snorm<8>(color.r)) << 0); 170 break; 171 case FORMAT_G8R8: 172 *(unsigned short*)element = (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0); 173 break; 174 case FORMAT_G8R8I: 175 *(unsigned short*)element = (static_cast<unsigned short>(scast<8>(color.g)) << 8) | 176 (static_cast<unsigned short>(scast<8>(color.r)) << 0); 177 break; 178 case FORMAT_G8R8UI: 179 *(unsigned short*)element = (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0); 180 break; 181 case FORMAT_G16R16: 182 *(unsigned int*)element = (unorm<16>(color.g) << 16) | (unorm<16>(color.r) << 0); 183 break; 184 case FORMAT_G16R16I: 185 *(unsigned int*)element = (static_cast<unsigned int>(scast<16>(color.g)) << 16) | 186 (static_cast<unsigned int>(scast<16>(color.r)) << 0); 187 break; 188 case FORMAT_G16R16UI: 189 *(unsigned int*)element = (ucast<16>(color.g) << 16) | (ucast<16>(color.r) << 0); 190 break; 191 case FORMAT_G32R32I: 192 case FORMAT_G32R32UI: 193 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r); 194 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g); 195 break; 196 case FORMAT_A16B16G16R16: 197 ((unsigned short*)element)[0] = unorm<16>(color.r); 198 ((unsigned short*)element)[1] = unorm<16>(color.g); 199 ((unsigned short*)element)[2] = unorm<16>(color.b); 200 ((unsigned short*)element)[3] = unorm<16>(color.a); 201 break; 202 case FORMAT_A16B16G16R16I: 203 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r)); 204 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g)); 205 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b)); 206 ((unsigned short*)element)[3] = static_cast<unsigned short>(scast<16>(color.a)); 207 break; 208 case FORMAT_A16B16G16R16UI: 209 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r)); 210 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g)); 211 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b)); 212 ((unsigned short*)element)[3] = static_cast<unsigned short>(ucast<16>(color.a)); 213 break; 214 case FORMAT_X16B16G16R16I: 215 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r)); 216 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g)); 217 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b)); 218 break; 219 case FORMAT_X16B16G16R16UI: 220 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r)); 221 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g)); 222 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b)); 223 break; 224 case FORMAT_A32B32G32R32I: 225 case FORMAT_A32B32G32R32UI: 226 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r); 227 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g); 228 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b); 229 ((unsigned int*)element)[3] = static_cast<unsigned int>(color.a); 230 break; 231 case FORMAT_X32B32G32R32I: 232 case FORMAT_X32B32G32R32UI: 233 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r); 234 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g); 235 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b); 236 break; 237 case FORMAT_V8U8: 238 *(unsigned short*)element = (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0); 239 break; 240 case FORMAT_L6V5U5: 241 *(unsigned short*)element = (unorm<6>(color.b) << 10) | (snorm<5>(color.g) << 5) | (snorm<5>(color.r) << 0); 242 break; 243 case FORMAT_Q8W8V8U8: 244 *(unsigned int*)element = (snorm<8>(color.a) << 24) | (snorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0); 245 break; 246 case FORMAT_X8L8V8U8: 247 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0); 248 break; 249 case FORMAT_V16U16: 250 *(unsigned int*)element = (snorm<16>(color.g) << 16) | (snorm<16>(color.r) << 0); 251 break; 252 case FORMAT_A2W10V10U10: 253 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (snorm<10>(color.b) << 20) | (snorm<10>(color.g) << 10) | (snorm<10>(color.r) << 0); 254 break; 255 case FORMAT_A16W16V16U16: 256 ((unsigned short*)element)[0] = snorm<16>(color.r); 257 ((unsigned short*)element)[1] = snorm<16>(color.g); 258 ((unsigned short*)element)[2] = snorm<16>(color.b); 259 ((unsigned short*)element)[3] = unorm<16>(color.a); 260 break; 261 case FORMAT_Q16W16V16U16: 262 ((unsigned short*)element)[0] = snorm<16>(color.r); 263 ((unsigned short*)element)[1] = snorm<16>(color.g); 264 ((unsigned short*)element)[2] = snorm<16>(color.b); 265 ((unsigned short*)element)[3] = snorm<16>(color.a); 266 break; 267 case FORMAT_R8G8B8: 268 ((unsigned char*)element)[0] = unorm<8>(color.b); 269 ((unsigned char*)element)[1] = unorm<8>(color.g); 270 ((unsigned char*)element)[2] = unorm<8>(color.r); 271 break; 272 case FORMAT_B8G8R8: 273 ((unsigned char*)element)[0] = unorm<8>(color.r); 274 ((unsigned char*)element)[1] = unorm<8>(color.g); 275 ((unsigned char*)element)[2] = unorm<8>(color.b); 276 break; 277 case FORMAT_R16F: 278 *(half*)element = (half)color.r; 279 break; 280 case FORMAT_A16F: 281 *(half*)element = (half)color.a; 282 break; 283 case FORMAT_G16R16F: 284 ((half*)element)[0] = (half)color.r; 285 ((half*)element)[1] = (half)color.g; 286 break; 287 case FORMAT_B16G16R16F: 288 ((half*)element)[0] = (half)color.r; 289 ((half*)element)[1] = (half)color.g; 290 ((half*)element)[2] = (half)color.b; 291 break; 292 case FORMAT_A16B16G16R16F: 293 ((half*)element)[0] = (half)color.r; 294 ((half*)element)[1] = (half)color.g; 295 ((half*)element)[2] = (half)color.b; 296 ((half*)element)[3] = (half)color.a; 297 break; 298 case FORMAT_A32F: 299 *(float*)element = color.a; 300 break; 301 case FORMAT_R32F: 302 *(float*)element = color.r; 303 break; 304 case FORMAT_G32R32F: 305 ((float*)element)[0] = color.r; 306 ((float*)element)[1] = color.g; 307 break; 308 case FORMAT_B32G32R32F: 309 ((float*)element)[0] = color.r; 310 ((float*)element)[1] = color.g; 311 ((float*)element)[2] = color.b; 312 break; 313 case FORMAT_A32B32G32R32F: 314 ((float*)element)[0] = color.r; 315 ((float*)element)[1] = color.g; 316 ((float*)element)[2] = color.b; 317 ((float*)element)[3] = color.a; 318 break; 319 case FORMAT_D32F: 320 case FORMAT_D32F_LOCKABLE: 321 case FORMAT_D32FS8_TEXTURE: 322 case FORMAT_D32FS8_SHADOW: 323 *((float*)element) = color.r; 324 break; 325 case FORMAT_D32F_COMPLEMENTARY: 326 *((float*)element) = 1 - color.r; 327 break; 328 case FORMAT_S8: 329 *((unsigned char*)element) = unorm<8>(color.r); 330 break; 331 case FORMAT_L8: 332 *(unsigned char*)element = unorm<8>(color.r); 333 break; 334 case FORMAT_A4L4: 335 *(unsigned char*)element = (unorm<4>(color.a) << 4) | (unorm<4>(color.r) << 0); 336 break; 337 case FORMAT_L16: 338 *(unsigned short*)element = unorm<16>(color.r); 339 break; 340 case FORMAT_A8L8: 341 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<8>(color.r) << 0); 342 break; 343 case FORMAT_L16F: 344 *(half*)element = (half)color.r; 345 break; 346 case FORMAT_A16L16F: 347 ((half*)element)[0] = (half)color.r; 348 ((half*)element)[1] = (half)color.a; 349 break; 350 case FORMAT_L32F: 351 *(float*)element = color.r; 352 break; 353 case FORMAT_A32L32F: 354 ((float*)element)[0] = color.r; 355 ((float*)element)[1] = color.a; 356 break; 357 default: 358 ASSERT(false); 359 } 360 } 361 362 Color<float> Surface::Buffer::read(int x, int y, int z) const 363 { 364 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB; 365 366 return read(element); 367 } 368 369 Color<float> Surface::Buffer::read(int x, int y) const 370 { 371 void *element = (unsigned char*)buffer + x * bytes + y * pitchB; 372 373 return read(element); 374 } 375 376 inline Color<float> Surface::Buffer::read(void *element) const 377 { 378 float r = 0.0f; 379 float g = 0.0f; 380 float b = 0.0f; 381 float a = 1.0f; 382 383 switch(format) 384 { 385 case FORMAT_P8: 386 { 387 ASSERT(palette); 388 389 unsigned int abgr = palette[*(unsigned char*)element]; 390 391 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF); 392 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00); 393 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000); 394 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000); 395 } 396 break; 397 case FORMAT_A8P8: 398 { 399 ASSERT(palette); 400 401 unsigned int bgr = palette[((unsigned char*)element)[0]]; 402 403 r = (bgr & 0x000000FF) * (1.0f / 0x000000FF); 404 g = (bgr & 0x0000FF00) * (1.0f / 0x0000FF00); 405 b = (bgr & 0x00FF0000) * (1.0f / 0x00FF0000); 406 a = ((unsigned char*)element)[1] * (1.0f / 0xFF); 407 } 408 break; 409 case FORMAT_A8: 410 r = 0; 411 g = 0; 412 b = 0; 413 a = *(unsigned char*)element * (1.0f / 0xFF); 414 break; 415 case FORMAT_R8I_SNORM: 416 r = max((*(signed char*)element) * (1.0f / 0x7F), -1.0f); 417 break; 418 case FORMAT_R8: 419 r = *(unsigned char*)element * (1.0f / 0xFF); 420 break; 421 case FORMAT_R8I: 422 r = *(signed char*)element; 423 break; 424 case FORMAT_R8UI: 425 r = *(unsigned char*)element; 426 break; 427 case FORMAT_R3G3B2: 428 { 429 unsigned char rgb = *(unsigned char*)element; 430 431 r = (rgb & 0xE0) * (1.0f / 0xE0); 432 g = (rgb & 0x1C) * (1.0f / 0x1C); 433 b = (rgb & 0x03) * (1.0f / 0x03); 434 } 435 break; 436 case FORMAT_A8R3G3B2: 437 { 438 unsigned short argb = *(unsigned short*)element; 439 440 a = (argb & 0xFF00) * (1.0f / 0xFF00); 441 r = (argb & 0x00E0) * (1.0f / 0x00E0); 442 g = (argb & 0x001C) * (1.0f / 0x001C); 443 b = (argb & 0x0003) * (1.0f / 0x0003); 444 } 445 break; 446 case FORMAT_X4R4G4B4: 447 { 448 unsigned short rgb = *(unsigned short*)element; 449 450 r = (rgb & 0x0F00) * (1.0f / 0x0F00); 451 g = (rgb & 0x00F0) * (1.0f / 0x00F0); 452 b = (rgb & 0x000F) * (1.0f / 0x000F); 453 } 454 break; 455 case FORMAT_A4R4G4B4: 456 { 457 unsigned short argb = *(unsigned short*)element; 458 459 a = (argb & 0xF000) * (1.0f / 0xF000); 460 r = (argb & 0x0F00) * (1.0f / 0x0F00); 461 g = (argb & 0x00F0) * (1.0f / 0x00F0); 462 b = (argb & 0x000F) * (1.0f / 0x000F); 463 } 464 break; 465 case FORMAT_R4G4B4A4: 466 { 467 unsigned short rgba = *(unsigned short*)element; 468 469 r = (rgba & 0xF000) * (1.0f / 0xF000); 470 g = (rgba & 0x0F00) * (1.0f / 0x0F00); 471 b = (rgba & 0x00F0) * (1.0f / 0x00F0); 472 a = (rgba & 0x000F) * (1.0f / 0x000F); 473 } 474 break; 475 case FORMAT_R5G6B5: 476 { 477 unsigned short rgb = *(unsigned short*)element; 478 479 r = (rgb & 0xF800) * (1.0f / 0xF800); 480 g = (rgb & 0x07E0) * (1.0f / 0x07E0); 481 b = (rgb & 0x001F) * (1.0f / 0x001F); 482 } 483 break; 484 case FORMAT_A1R5G5B5: 485 { 486 unsigned short argb = *(unsigned short*)element; 487 488 a = (argb & 0x8000) * (1.0f / 0x8000); 489 r = (argb & 0x7C00) * (1.0f / 0x7C00); 490 g = (argb & 0x03E0) * (1.0f / 0x03E0); 491 b = (argb & 0x001F) * (1.0f / 0x001F); 492 } 493 break; 494 case FORMAT_R5G5B5A1: 495 { 496 unsigned short rgba = *(unsigned short*)element; 497 498 r = (rgba & 0xF800) * (1.0f / 0xF800); 499 g = (rgba & 0x07C0) * (1.0f / 0x07C0); 500 b = (rgba & 0x003E) * (1.0f / 0x003E); 501 a = (rgba & 0x0001) * (1.0f / 0x0001); 502 } 503 break; 504 case FORMAT_X1R5G5B5: 505 { 506 unsigned short xrgb = *(unsigned short*)element; 507 508 r = (xrgb & 0x7C00) * (1.0f / 0x7C00); 509 g = (xrgb & 0x03E0) * (1.0f / 0x03E0); 510 b = (xrgb & 0x001F) * (1.0f / 0x001F); 511 } 512 break; 513 case FORMAT_A8R8G8B8: 514 { 515 unsigned int argb = *(unsigned int*)element; 516 517 a = (argb & 0xFF000000) * (1.0f / 0xFF000000); 518 r = (argb & 0x00FF0000) * (1.0f / 0x00FF0000); 519 g = (argb & 0x0000FF00) * (1.0f / 0x0000FF00); 520 b = (argb & 0x000000FF) * (1.0f / 0x000000FF); 521 } 522 break; 523 case FORMAT_X8R8G8B8: 524 { 525 unsigned int xrgb = *(unsigned int*)element; 526 527 r = (xrgb & 0x00FF0000) * (1.0f / 0x00FF0000); 528 g = (xrgb & 0x0000FF00) * (1.0f / 0x0000FF00); 529 b = (xrgb & 0x000000FF) * (1.0f / 0x000000FF); 530 } 531 break; 532 case FORMAT_A8B8G8R8I_SNORM: 533 { 534 signed char* abgr = (signed char*)element; 535 536 r = max(abgr[0] * (1.0f / 0x7F), -1.0f); 537 g = max(abgr[1] * (1.0f / 0x7F), -1.0f); 538 b = max(abgr[2] * (1.0f / 0x7F), -1.0f); 539 a = max(abgr[3] * (1.0f / 0x7F), -1.0f); 540 } 541 break; 542 case FORMAT_A8B8G8R8: 543 { 544 unsigned int abgr = *(unsigned int*)element; 545 546 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000); 547 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000); 548 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00); 549 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF); 550 } 551 break; 552 case FORMAT_A8B8G8R8I: 553 { 554 signed char* abgr = (signed char*)element; 555 556 r = abgr[0]; 557 g = abgr[1]; 558 b = abgr[2]; 559 a = abgr[3]; 560 } 561 break; 562 case FORMAT_A8B8G8R8UI: 563 { 564 unsigned char* abgr = (unsigned char*)element; 565 566 r = abgr[0]; 567 g = abgr[1]; 568 b = abgr[2]; 569 a = abgr[3]; 570 } 571 break; 572 case FORMAT_X8B8G8R8I_SNORM: 573 { 574 signed char* bgr = (signed char*)element; 575 576 r = max(bgr[0] * (1.0f / 0x7F), -1.0f); 577 g = max(bgr[1] * (1.0f / 0x7F), -1.0f); 578 b = max(bgr[2] * (1.0f / 0x7F), -1.0f); 579 } 580 break; 581 case FORMAT_X8B8G8R8: 582 { 583 unsigned int xbgr = *(unsigned int*)element; 584 585 b = (xbgr & 0x00FF0000) * (1.0f / 0x00FF0000); 586 g = (xbgr & 0x0000FF00) * (1.0f / 0x0000FF00); 587 r = (xbgr & 0x000000FF) * (1.0f / 0x000000FF); 588 } 589 break; 590 case FORMAT_X8B8G8R8I: 591 { 592 signed char* bgr = (signed char*)element; 593 594 r = bgr[0]; 595 g = bgr[1]; 596 b = bgr[2]; 597 } 598 break; 599 case FORMAT_X8B8G8R8UI: 600 { 601 unsigned char* bgr = (unsigned char*)element; 602 603 r = bgr[0]; 604 g = bgr[1]; 605 b = bgr[2]; 606 } 607 break; 608 case FORMAT_G8R8I_SNORM: 609 { 610 signed char* gr = (signed char*)element; 611 612 r = (gr[0] & 0xFF00) * (1.0f / 0xFF00); 613 g = (gr[1] & 0x00FF) * (1.0f / 0x00FF); 614 } 615 break; 616 case FORMAT_G8R8: 617 { 618 unsigned short gr = *(unsigned short*)element; 619 620 g = (gr & 0xFF00) * (1.0f / 0xFF00); 621 r = (gr & 0x00FF) * (1.0f / 0x00FF); 622 } 623 break; 624 case FORMAT_G8R8I: 625 { 626 signed char* gr = (signed char*)element; 627 628 r = gr[0]; 629 g = gr[1]; 630 } 631 break; 632 case FORMAT_G8R8UI: 633 { 634 unsigned char* gr = (unsigned char*)element; 635 636 r = gr[0]; 637 g = gr[1]; 638 } 639 break; 640 case FORMAT_R16I: 641 r = *((short*)element); 642 break; 643 case FORMAT_R16UI: 644 r = *((unsigned short*)element); 645 break; 646 case FORMAT_G16R16I: 647 { 648 short* gr = (short*)element; 649 650 r = gr[0]; 651 g = gr[1]; 652 } 653 break; 654 case FORMAT_G16R16: 655 { 656 unsigned int gr = *(unsigned int*)element; 657 658 g = (gr & 0xFFFF0000) * (1.0f / 0xFFFF0000); 659 r = (gr & 0x0000FFFF) * (1.0f / 0x0000FFFF); 660 } 661 break; 662 case FORMAT_G16R16UI: 663 { 664 unsigned short* gr = (unsigned short*)element; 665 666 r = gr[0]; 667 g = gr[1]; 668 } 669 break; 670 case FORMAT_A2R10G10B10: 671 { 672 unsigned int argb = *(unsigned int*)element; 673 674 a = (argb & 0xC0000000) * (1.0f / 0xC0000000); 675 r = (argb & 0x3FF00000) * (1.0f / 0x3FF00000); 676 g = (argb & 0x000FFC00) * (1.0f / 0x000FFC00); 677 b = (argb & 0x000003FF) * (1.0f / 0x000003FF); 678 } 679 break; 680 case FORMAT_A2B10G10R10: 681 { 682 unsigned int abgr = *(unsigned int*)element; 683 684 a = (abgr & 0xC0000000) * (1.0f / 0xC0000000); 685 b = (abgr & 0x3FF00000) * (1.0f / 0x3FF00000); 686 g = (abgr & 0x000FFC00) * (1.0f / 0x000FFC00); 687 r = (abgr & 0x000003FF) * (1.0f / 0x000003FF); 688 } 689 break; 690 case FORMAT_A16B16G16R16I: 691 { 692 short* abgr = (short*)element; 693 694 r = abgr[0]; 695 g = abgr[1]; 696 b = abgr[2]; 697 a = abgr[3]; 698 } 699 break; 700 case FORMAT_A16B16G16R16: 701 r = ((unsigned short*)element)[0] * (1.0f / 0xFFFF); 702 g = ((unsigned short*)element)[1] * (1.0f / 0xFFFF); 703 b = ((unsigned short*)element)[2] * (1.0f / 0xFFFF); 704 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF); 705 break; 706 case FORMAT_A16B16G16R16UI: 707 { 708 unsigned short* abgr = (unsigned short*)element; 709 710 r = abgr[0]; 711 g = abgr[1]; 712 b = abgr[2]; 713 a = abgr[3]; 714 } 715 break; 716 case FORMAT_X16B16G16R16I: 717 { 718 short* bgr = (short*)element; 719 720 r = bgr[0]; 721 g = bgr[1]; 722 b = bgr[2]; 723 } 724 break; 725 case FORMAT_X16B16G16R16UI: 726 { 727 unsigned short* bgr = (unsigned short*)element; 728 729 r = bgr[0]; 730 g = bgr[1]; 731 b = bgr[2]; 732 } 733 break; 734 case FORMAT_A32B32G32R32I: 735 { 736 int* abgr = (int*)element; 737 738 r = static_cast<float>(abgr[0]); 739 g = static_cast<float>(abgr[1]); 740 b = static_cast<float>(abgr[2]); 741 a = static_cast<float>(abgr[3]); 742 } 743 break; 744 case FORMAT_A32B32G32R32UI: 745 { 746 unsigned int* abgr = (unsigned int*)element; 747 748 r = static_cast<float>(abgr[0]); 749 g = static_cast<float>(abgr[1]); 750 b = static_cast<float>(abgr[2]); 751 a = static_cast<float>(abgr[3]); 752 } 753 break; 754 case FORMAT_X32B32G32R32I: 755 { 756 int* bgr = (int*)element; 757 758 r = static_cast<float>(bgr[0]); 759 g = static_cast<float>(bgr[1]); 760 b = static_cast<float>(bgr[2]); 761 } 762 break; 763 case FORMAT_X32B32G32R32UI: 764 { 765 unsigned int* bgr = (unsigned int*)element; 766 767 r = static_cast<float>(bgr[0]); 768 g = static_cast<float>(bgr[1]); 769 b = static_cast<float>(bgr[2]); 770 } 771 break; 772 case FORMAT_G32R32I: 773 { 774 int* gr = (int*)element; 775 776 r = static_cast<float>(gr[0]); 777 g = static_cast<float>(gr[1]); 778 } 779 break; 780 case FORMAT_G32R32UI: 781 { 782 unsigned int* gr = (unsigned int*)element; 783 784 r = static_cast<float>(gr[0]); 785 g = static_cast<float>(gr[1]); 786 } 787 break; 788 case FORMAT_R32I: 789 r = static_cast<float>(*((int*)element)); 790 break; 791 case FORMAT_R32UI: 792 r = static_cast<float>(*((unsigned int*)element)); 793 break; 794 case FORMAT_V8U8: 795 { 796 unsigned short vu = *(unsigned short*)element; 797 798 r = ((int)(vu & 0x00FF) << 24) * (1.0f / 0x7F000000); 799 g = ((int)(vu & 0xFF00) << 16) * (1.0f / 0x7F000000); 800 } 801 break; 802 case FORMAT_L6V5U5: 803 { 804 unsigned short lvu = *(unsigned short*)element; 805 806 r = ((int)(lvu & 0x001F) << 27) * (1.0f / 0x78000000); 807 g = ((int)(lvu & 0x03E0) << 22) * (1.0f / 0x78000000); 808 b = (lvu & 0xFC00) * (1.0f / 0xFC00); 809 } 810 break; 811 case FORMAT_Q8W8V8U8: 812 { 813 unsigned int qwvu = *(unsigned int*)element; 814 815 r = ((int)(qwvu & 0x000000FF) << 24) * (1.0f / 0x7F000000); 816 g = ((int)(qwvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000); 817 b = ((int)(qwvu & 0x00FF0000) << 8) * (1.0f / 0x7F000000); 818 a = ((int)(qwvu & 0xFF000000) << 0) * (1.0f / 0x7F000000); 819 } 820 break; 821 case FORMAT_X8L8V8U8: 822 { 823 unsigned int xlvu = *(unsigned int*)element; 824 825 r = ((int)(xlvu & 0x000000FF) << 24) * (1.0f / 0x7F000000); 826 g = ((int)(xlvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000); 827 b = (xlvu & 0x00FF0000) * (1.0f / 0x00FF0000); 828 } 829 break; 830 case FORMAT_R8G8B8: 831 r = ((unsigned char*)element)[2] * (1.0f / 0xFF); 832 g = ((unsigned char*)element)[1] * (1.0f / 0xFF); 833 b = ((unsigned char*)element)[0] * (1.0f / 0xFF); 834 break; 835 case FORMAT_B8G8R8: 836 r = ((unsigned char*)element)[0] * (1.0f / 0xFF); 837 g = ((unsigned char*)element)[1] * (1.0f / 0xFF); 838 b = ((unsigned char*)element)[2] * (1.0f / 0xFF); 839 break; 840 case FORMAT_V16U16: 841 { 842 unsigned int vu = *(unsigned int*)element; 843 844 r = ((int)(vu & 0x0000FFFF) << 16) * (1.0f / 0x7FFF0000); 845 g = ((int)(vu & 0xFFFF0000) << 0) * (1.0f / 0x7FFF0000); 846 } 847 break; 848 case FORMAT_A2W10V10U10: 849 { 850 unsigned int awvu = *(unsigned int*)element; 851 852 r = ((int)(awvu & 0x000003FF) << 22) * (1.0f / 0x7FC00000); 853 g = ((int)(awvu & 0x000FFC00) << 12) * (1.0f / 0x7FC00000); 854 b = ((int)(awvu & 0x3FF00000) << 2) * (1.0f / 0x7FC00000); 855 a = (awvu & 0xC0000000) * (1.0f / 0xC0000000); 856 } 857 break; 858 case FORMAT_A16W16V16U16: 859 r = ((signed short*)element)[0] * (1.0f / 0x7FFF); 860 g = ((signed short*)element)[1] * (1.0f / 0x7FFF); 861 b = ((signed short*)element)[2] * (1.0f / 0x7FFF); 862 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF); 863 break; 864 case FORMAT_Q16W16V16U16: 865 r = ((signed short*)element)[0] * (1.0f / 0x7FFF); 866 g = ((signed short*)element)[1] * (1.0f / 0x7FFF); 867 b = ((signed short*)element)[2] * (1.0f / 0x7FFF); 868 a = ((signed short*)element)[3] * (1.0f / 0x7FFF); 869 break; 870 case FORMAT_L8: 871 r = 872 g = 873 b = *(unsigned char*)element * (1.0f / 0xFF); 874 break; 875 case FORMAT_A4L4: 876 { 877 unsigned char al = *(unsigned char*)element; 878 879 r = 880 g = 881 b = (al & 0x0F) * (1.0f / 0x0F); 882 a = (al & 0xF0) * (1.0f / 0xF0); 883 } 884 break; 885 case FORMAT_L16: 886 r = 887 g = 888 b = *(unsigned short*)element * (1.0f / 0xFFFF); 889 break; 890 case FORMAT_A8L8: 891 r = 892 g = 893 b = ((unsigned char*)element)[0] * (1.0f / 0xFF); 894 a = ((unsigned char*)element)[1] * (1.0f / 0xFF); 895 break; 896 case FORMAT_L16F: 897 r = 898 g = 899 b = *(half*)element; 900 break; 901 case FORMAT_A16L16F: 902 r = 903 g = 904 b = ((half*)element)[0]; 905 a = ((half*)element)[1]; 906 break; 907 case FORMAT_L32F: 908 r = 909 g = 910 b = *(float*)element; 911 break; 912 case FORMAT_A32L32F: 913 r = 914 g = 915 b = ((float*)element)[0]; 916 a = ((float*)element)[1]; 917 break; 918 case FORMAT_A16F: 919 a = *(half*)element; 920 break; 921 case FORMAT_R16F: 922 r = *(half*)element; 923 break; 924 case FORMAT_G16R16F: 925 r = ((half*)element)[0]; 926 g = ((half*)element)[1]; 927 break; 928 case FORMAT_B16G16R16F: 929 r = ((half*)element)[0]; 930 g = ((half*)element)[1]; 931 b = ((half*)element)[2]; 932 break; 933 case FORMAT_A16B16G16R16F: 934 r = ((half*)element)[0]; 935 g = ((half*)element)[1]; 936 b = ((half*)element)[2]; 937 a = ((half*)element)[3]; 938 break; 939 case FORMAT_A32F: 940 a = *(float*)element; 941 break; 942 case FORMAT_R32F: 943 r = *(float*)element; 944 break; 945 case FORMAT_G32R32F: 946 r = ((float*)element)[0]; 947 g = ((float*)element)[1]; 948 break; 949 case FORMAT_B32G32R32F: 950 r = ((float*)element)[0]; 951 g = ((float*)element)[1]; 952 b = ((float*)element)[2]; 953 break; 954 case FORMAT_A32B32G32R32F: 955 r = ((float*)element)[0]; 956 g = ((float*)element)[1]; 957 b = ((float*)element)[2]; 958 a = ((float*)element)[3]; 959 break; 960 case FORMAT_D32F: 961 case FORMAT_D32F_LOCKABLE: 962 case FORMAT_D32FS8_TEXTURE: 963 case FORMAT_D32FS8_SHADOW: 964 r = *(float*)element; 965 g = r; 966 b = r; 967 a = r; 968 break; 969 case FORMAT_D32F_COMPLEMENTARY: 970 r = 1.0f - *(float*)element; 971 g = r; 972 b = r; 973 a = r; 974 break; 975 case FORMAT_S8: 976 r = *(unsigned char*)element * (1.0f / 0xFF); 977 break; 978 default: 979 ASSERT(false); 980 } 981 982 // if(sRGB) 983 // { 984 // r = sRGBtoLinear(r); 985 // g = sRGBtoLinear(g); 986 // b = sRGBtoLinear(b); 987 // } 988 989 return Color<float>(r, g, b, a); 990 } 991 992 Color<float> Surface::Buffer::sample(float x, float y, float z) const 993 { 994 x -= 0.5f; 995 y -= 0.5f; 996 z -= 0.5f; 997 998 int x0 = clamp((int)x, 0, width - 1); 999 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1; 1000 1001 int y0 = clamp((int)y, 0, height - 1); 1002 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1; 1003 1004 int z0 = clamp((int)z, 0, depth - 1); 1005 int z1 = (z0 + 1 >= depth) ? z0 : z0 + 1; 1006 1007 Color<float> c000 = read(x0, y0, z0); 1008 Color<float> c100 = read(x1, y0, z0); 1009 Color<float> c010 = read(x0, y1, z0); 1010 Color<float> c110 = read(x1, y1, z0); 1011 Color<float> c001 = read(x0, y0, z1); 1012 Color<float> c101 = read(x1, y0, z1); 1013 Color<float> c011 = read(x0, y1, z1); 1014 Color<float> c111 = read(x1, y1, z1); 1015 1016 float fx = x - x0; 1017 float fy = y - y0; 1018 float fz = z - z0; 1019 1020 c000 *= (1 - fx) * (1 - fy) * (1 - fz); 1021 c100 *= fx * (1 - fy) * (1 - fz); 1022 c010 *= (1 - fx) * fy * (1 - fz); 1023 c110 *= fx * fy * (1 - fz); 1024 c001 *= (1 - fx) * (1 - fy) * fz; 1025 c101 *= fx * (1 - fy) * fz; 1026 c011 *= (1 - fx) * fy * fz; 1027 c111 *= fx * fy * fz; 1028 1029 return c000 + c100 + c010 + c110 + c001 + c101 + c011 + c111; 1030 } 1031 1032 Color<float> Surface::Buffer::sample(float x, float y) const 1033 { 1034 x -= 0.5f; 1035 y -= 0.5f; 1036 1037 int x0 = clamp((int)x, 0, width - 1); 1038 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1; 1039 1040 int y0 = clamp((int)y, 0, height - 1); 1041 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1; 1042 1043 Color<float> c00 = read(x0, y0); 1044 Color<float> c10 = read(x1, y0); 1045 Color<float> c01 = read(x0, y1); 1046 Color<float> c11 = read(x1, y1); 1047 1048 float fx = x - x0; 1049 float fy = y - y0; 1050 1051 c00 *= (1 - fx) * (1 - fy); 1052 c10 *= fx * (1 - fy); 1053 c01 *= (1 - fx) * fy; 1054 c11 *= fx * fy; 1055 1056 return c00 + c10 + c01 + c11; 1057 } 1058 1059 void *Surface::Buffer::lockRect(int x, int y, int z, Lock lock) 1060 { 1061 this->lock = lock; 1062 1063 switch(lock) 1064 { 1065 case LOCK_UNLOCKED: 1066 case LOCK_READONLY: 1067 break; 1068 case LOCK_WRITEONLY: 1069 case LOCK_READWRITE: 1070 case LOCK_DISCARD: 1071 dirty = true; 1072 break; 1073 default: 1074 ASSERT(false); 1075 } 1076 1077 if(buffer) 1078 { 1079 switch(format) 1080 { 1081 #if S3TC_SUPPORT 1082 case FORMAT_DXT1: 1083 #endif 1084 case FORMAT_ATI1: 1085 case FORMAT_ETC1: 1086 case FORMAT_R11_EAC: 1087 case FORMAT_SIGNED_R11_EAC: 1088 case FORMAT_RGB8_ETC2: 1089 case FORMAT_SRGB8_ETC2: 1090 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1091 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1092 return (unsigned char*)buffer + 8 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1093 case FORMAT_RG11_EAC: 1094 case FORMAT_SIGNED_RG11_EAC: 1095 case FORMAT_RGBA8_ETC2_EAC: 1096 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1097 case FORMAT_RGBA_ASTC_4x4_KHR: 1098 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1099 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1100 case FORMAT_RGBA_ASTC_5x4_KHR: 1101 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1102 return (unsigned char*)buffer + 16 * (x / 5) + (y / 4) * pitchB + z * sliceB; 1103 case FORMAT_RGBA_ASTC_5x5_KHR: 1104 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1105 return (unsigned char*)buffer + 16 * (x / 5) + (y / 5) * pitchB + z * sliceB; 1106 case FORMAT_RGBA_ASTC_6x5_KHR: 1107 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1108 return (unsigned char*)buffer + 16 * (x / 6) + (y / 5) * pitchB + z * sliceB; 1109 case FORMAT_RGBA_ASTC_6x6_KHR: 1110 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1111 return (unsigned char*)buffer + 16 * (x / 6) + (y / 6) * pitchB + z * sliceB; 1112 case FORMAT_RGBA_ASTC_8x5_KHR: 1113 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1114 return (unsigned char*)buffer + 16 * (x / 8) + (y / 5) * pitchB + z * sliceB; 1115 case FORMAT_RGBA_ASTC_8x6_KHR: 1116 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1117 return (unsigned char*)buffer + 16 * (x / 8) + (y / 6) * pitchB + z * sliceB; 1118 case FORMAT_RGBA_ASTC_8x8_KHR: 1119 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1120 return (unsigned char*)buffer + 16 * (x / 8) + (y / 8) * pitchB + z * sliceB; 1121 case FORMAT_RGBA_ASTC_10x5_KHR: 1122 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1123 return (unsigned char*)buffer + 16 * (x / 10) + (y / 5) * pitchB + z * sliceB; 1124 case FORMAT_RGBA_ASTC_10x6_KHR: 1125 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1126 return (unsigned char*)buffer + 16 * (x / 10) + (y / 6) * pitchB + z * sliceB; 1127 case FORMAT_RGBA_ASTC_10x8_KHR: 1128 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1129 return (unsigned char*)buffer + 16 * (x / 10) + (y / 8) * pitchB + z * sliceB; 1130 case FORMAT_RGBA_ASTC_10x10_KHR: 1131 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1132 return (unsigned char*)buffer + 16 * (x / 10) + (y / 10) * pitchB + z * sliceB; 1133 case FORMAT_RGBA_ASTC_12x10_KHR: 1134 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1135 return (unsigned char*)buffer + 16 * (x / 12) + (y / 10) * pitchB + z * sliceB; 1136 case FORMAT_RGBA_ASTC_12x12_KHR: 1137 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 1138 return (unsigned char*)buffer + 16 * (x / 12) + (y / 12) * pitchB + z * sliceB; 1139 #if S3TC_SUPPORT 1140 case FORMAT_DXT3: 1141 case FORMAT_DXT5: 1142 #endif 1143 case FORMAT_ATI2: 1144 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1145 default: 1146 return (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB; 1147 } 1148 } 1149 1150 return 0; 1151 } 1152 1153 void Surface::Buffer::unlockRect() 1154 { 1155 lock = LOCK_UNLOCKED; 1156 } 1157 1158 Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false) 1159 { 1160 resource = new Resource(0); 1161 hasParent = false; 1162 ownExternal = false; 1163 depth = max(1, depth); 1164 1165 external.buffer = pixels; 1166 external.width = width; 1167 external.height = height; 1168 external.depth = depth; 1169 external.format = format; 1170 external.bytes = bytes(external.format); 1171 external.pitchB = pitch; 1172 external.pitchP = pitch / external.bytes; 1173 external.sliceB = slice; 1174 external.sliceP = slice / external.bytes; 1175 external.lock = LOCK_UNLOCKED; 1176 external.dirty = true; 1177 1178 internal.buffer = 0; 1179 internal.width = width; 1180 internal.height = height; 1181 internal.depth = depth; 1182 internal.format = selectInternalFormat(format); 1183 internal.bytes = bytes(internal.format); 1184 internal.pitchB = pitchB(internal.width, internal.format, false); 1185 internal.pitchP = pitchP(internal.width, internal.format, false); 1186 internal.sliceB = sliceB(internal.width, internal.height, internal.format, false); 1187 internal.sliceP = sliceP(internal.width, internal.height, internal.format, false); 1188 internal.lock = LOCK_UNLOCKED; 1189 internal.dirty = false; 1190 1191 stencil.buffer = 0; 1192 stencil.width = width; 1193 stencil.height = height; 1194 stencil.depth = depth; 1195 stencil.format = FORMAT_S8; 1196 stencil.bytes = bytes(stencil.format); 1197 stencil.pitchB = pitchB(stencil.width, stencil.format, false); 1198 stencil.pitchP = pitchP(stencil.width, stencil.format, false); 1199 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, false); 1200 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, false); 1201 stencil.lock = LOCK_UNLOCKED; 1202 stencil.dirty = false; 1203 1204 dirtyMipmaps = true; 1205 paletteUsed = 0; 1206 } 1207 1208 Surface::Surface(Resource *texture, int width, int height, int depth, Format format, bool lockable, bool renderTarget) : lockable(lockable), renderTarget(renderTarget) 1209 { 1210 resource = texture ? texture : new Resource(0); 1211 hasParent = texture != 0; 1212 ownExternal = true; 1213 depth = max(1, depth); 1214 1215 external.buffer = 0; 1216 external.width = width; 1217 external.height = height; 1218 external.depth = depth; 1219 external.format = format; 1220 external.bytes = bytes(external.format); 1221 external.pitchB = pitchB(external.width, external.format, renderTarget && !texture); 1222 external.pitchP = pitchP(external.width, external.format, renderTarget && !texture); 1223 external.sliceB = sliceB(external.width, external.height, external.format, renderTarget && !texture); 1224 external.sliceP = sliceP(external.width, external.height, external.format, renderTarget && !texture); 1225 external.lock = LOCK_UNLOCKED; 1226 external.dirty = false; 1227 1228 internal.buffer = 0; 1229 internal.width = width; 1230 internal.height = height; 1231 internal.depth = depth; 1232 internal.format = selectInternalFormat(format); 1233 internal.bytes = bytes(internal.format); 1234 internal.pitchB = pitchB(internal.width, internal.format, renderTarget); 1235 internal.pitchP = pitchP(internal.width, internal.format, renderTarget); 1236 internal.sliceB = sliceB(internal.width, internal.height, internal.format, renderTarget); 1237 internal.sliceP = sliceP(internal.width, internal.height, internal.format, renderTarget); 1238 internal.lock = LOCK_UNLOCKED; 1239 internal.dirty = false; 1240 1241 stencil.buffer = 0; 1242 stencil.width = width; 1243 stencil.height = height; 1244 stencil.depth = depth; 1245 stencil.format = FORMAT_S8; 1246 stencil.bytes = bytes(stencil.format); 1247 stencil.pitchB = pitchB(stencil.width, stencil.format, renderTarget); 1248 stencil.pitchP = pitchP(stencil.width, stencil.format, renderTarget); 1249 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, renderTarget); 1250 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, renderTarget); 1251 stencil.lock = LOCK_UNLOCKED; 1252 stencil.dirty = false; 1253 1254 dirtyMipmaps = true; 1255 paletteUsed = 0; 1256 } 1257 1258 Surface::~Surface() 1259 { 1260 // Synchronize so we can deallocate the buffers below 1261 resource->lock(DESTRUCT); 1262 resource->unlock(); 1263 1264 if(!hasParent) 1265 { 1266 resource->destruct(); 1267 } 1268 1269 if(ownExternal) 1270 { 1271 deallocate(external.buffer); 1272 } 1273 1274 if(internal.buffer != external.buffer) 1275 { 1276 deallocate(internal.buffer); 1277 } 1278 1279 deallocate(stencil.buffer); 1280 1281 external.buffer = 0; 1282 internal.buffer = 0; 1283 stencil.buffer = 0; 1284 } 1285 1286 void *Surface::lockExternal(int x, int y, int z, Lock lock, Accessor client) 1287 { 1288 resource->lock(client); 1289 1290 if(!external.buffer) 1291 { 1292 if(internal.buffer && identicalFormats()) 1293 { 1294 external.buffer = internal.buffer; 1295 } 1296 else 1297 { 1298 external.buffer = allocateBuffer(external.width, external.height, external.depth, external.format); 1299 } 1300 } 1301 1302 if(internal.dirty) 1303 { 1304 if(lock != LOCK_DISCARD) 1305 { 1306 update(external, internal); 1307 } 1308 1309 internal.dirty = false; 1310 } 1311 1312 switch(lock) 1313 { 1314 case LOCK_READONLY: 1315 break; 1316 case LOCK_WRITEONLY: 1317 case LOCK_READWRITE: 1318 case LOCK_DISCARD: 1319 dirtyMipmaps = true; 1320 break; 1321 default: 1322 ASSERT(false); 1323 } 1324 1325 return external.lockRect(x, y, z, lock); 1326 } 1327 1328 void Surface::unlockExternal() 1329 { 1330 resource->unlock(); 1331 1332 external.unlockRect(); 1333 } 1334 1335 void *Surface::lockInternal(int x, int y, int z, Lock lock, Accessor client) 1336 { 1337 if(lock != LOCK_UNLOCKED) 1338 { 1339 resource->lock(client); 1340 } 1341 1342 if(!internal.buffer) 1343 { 1344 if(external.buffer && identicalFormats()) 1345 { 1346 internal.buffer = external.buffer; 1347 } 1348 else 1349 { 1350 internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.format); 1351 } 1352 } 1353 1354 // FIXME: WHQL requires conversion to lower external precision and back 1355 if(logPrecision >= WHQL) 1356 { 1357 if(internal.dirty && renderTarget && internal.format != external.format) 1358 { 1359 if(lock != LOCK_DISCARD) 1360 { 1361 switch(external.format) 1362 { 1363 case FORMAT_R3G3B2: 1364 case FORMAT_A8R3G3B2: 1365 case FORMAT_A1R5G5B5: 1366 case FORMAT_A2R10G10B10: 1367 case FORMAT_A2B10G10R10: 1368 lockExternal(0, 0, 0, LOCK_READWRITE, client); 1369 unlockExternal(); 1370 break; 1371 default: 1372 // Difference passes WHQL 1373 break; 1374 } 1375 } 1376 } 1377 } 1378 1379 if(external.dirty || (isPalette(external.format) && paletteUsed != Surface::paletteID)) 1380 { 1381 if(lock != LOCK_DISCARD) 1382 { 1383 update(internal, external); 1384 } 1385 1386 external.dirty = false; 1387 paletteUsed = Surface::paletteID; 1388 } 1389 1390 switch(lock) 1391 { 1392 case LOCK_UNLOCKED: 1393 case LOCK_READONLY: 1394 break; 1395 case LOCK_WRITEONLY: 1396 case LOCK_READWRITE: 1397 case LOCK_DISCARD: 1398 dirtyMipmaps = true; 1399 break; 1400 default: 1401 ASSERT(false); 1402 } 1403 1404 if(lock == LOCK_READONLY && client == PUBLIC) 1405 { 1406 resolve(); 1407 } 1408 1409 return internal.lockRect(x, y, z, lock); 1410 } 1411 1412 void Surface::unlockInternal() 1413 { 1414 resource->unlock(); 1415 1416 internal.unlockRect(); 1417 } 1418 1419 void *Surface::lockStencil(int front, Accessor client) 1420 { 1421 resource->lock(client); 1422 1423 if(!stencil.buffer) 1424 { 1425 stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.format); 1426 } 1427 1428 return stencil.lockRect(0, 0, front, LOCK_READWRITE); // FIXME 1429 } 1430 1431 void Surface::unlockStencil() 1432 { 1433 resource->unlock(); 1434 1435 stencil.unlockRect(); 1436 } 1437 1438 int Surface::bytes(Format format) 1439 { 1440 switch(format) 1441 { 1442 case FORMAT_NULL: return 0; 1443 case FORMAT_P8: return 1; 1444 case FORMAT_A8P8: return 2; 1445 case FORMAT_A8: return 1; 1446 case FORMAT_R8I: return 1; 1447 case FORMAT_R8: return 1; 1448 case FORMAT_R3G3B2: return 1; 1449 case FORMAT_R16I: return 2; 1450 case FORMAT_R16UI: return 2; 1451 case FORMAT_A8R3G3B2: return 2; 1452 case FORMAT_R5G6B5: return 2; 1453 case FORMAT_A1R5G5B5: return 2; 1454 case FORMAT_X1R5G5B5: return 2; 1455 case FORMAT_R5G5B5A1: return 2; 1456 case FORMAT_X4R4G4B4: return 2; 1457 case FORMAT_A4R4G4B4: return 2; 1458 case FORMAT_R4G4B4A4: return 2; 1459 case FORMAT_R8G8B8: return 3; 1460 case FORMAT_B8G8R8: return 3; 1461 case FORMAT_R32I: return 4; 1462 case FORMAT_R32UI: return 4; 1463 case FORMAT_X8R8G8B8: return 4; 1464 // case FORMAT_X8G8R8B8Q: return 4; 1465 case FORMAT_A8R8G8B8: return 4; 1466 // case FORMAT_A8G8R8B8Q: return 4; 1467 case FORMAT_X8B8G8R8I: return 4; 1468 case FORMAT_X8B8G8R8: return 4; 1469 case FORMAT_A8B8G8R8I: return 4; 1470 case FORMAT_R8UI: return 1; 1471 case FORMAT_G8R8UI: return 2; 1472 case FORMAT_X8B8G8R8UI: return 4; 1473 case FORMAT_A8B8G8R8UI: return 4; 1474 case FORMAT_A8B8G8R8: return 4; 1475 case FORMAT_R8I_SNORM: return 1; 1476 case FORMAT_G8R8I_SNORM: return 2; 1477 case FORMAT_X8B8G8R8I_SNORM: return 4; 1478 case FORMAT_A8B8G8R8I_SNORM: return 4; 1479 case FORMAT_A2R10G10B10: return 4; 1480 case FORMAT_A2B10G10R10: return 4; 1481 case FORMAT_G8R8I: return 2; 1482 case FORMAT_G8R8: return 2; 1483 case FORMAT_G16R16I: return 4; 1484 case FORMAT_G16R16UI: return 4; 1485 case FORMAT_G16R16: return 4; 1486 case FORMAT_G32R32I: return 8; 1487 case FORMAT_G32R32UI: return 8; 1488 case FORMAT_X16B16G16R16I: return 8; 1489 case FORMAT_X16B16G16R16UI: return 8; 1490 case FORMAT_A16B16G16R16I: return 8; 1491 case FORMAT_A16B16G16R16UI: return 8; 1492 case FORMAT_A16B16G16R16: return 8; 1493 case FORMAT_X32B32G32R32I: return 16; 1494 case FORMAT_X32B32G32R32UI: return 16; 1495 case FORMAT_A32B32G32R32I: return 16; 1496 case FORMAT_A32B32G32R32UI: return 16; 1497 // Compressed formats 1498 #if S3TC_SUPPORT 1499 case FORMAT_DXT1: return 2; // Column of four pixels 1500 case FORMAT_DXT3: return 4; // Column of four pixels 1501 case FORMAT_DXT5: return 4; // Column of four pixels 1502 #endif 1503 case FORMAT_ATI1: return 2; // Column of four pixels 1504 case FORMAT_ATI2: return 4; // Column of four pixels 1505 case FORMAT_ETC1: return 2; // Column of four pixels 1506 case FORMAT_R11_EAC: return 2; 1507 case FORMAT_SIGNED_R11_EAC: return 2; 1508 case FORMAT_RG11_EAC: return 4; 1509 case FORMAT_SIGNED_RG11_EAC: return 4; 1510 case FORMAT_RGB8_ETC2: return 2; 1511 case FORMAT_SRGB8_ETC2: return 2; 1512 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2; 1513 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2; 1514 case FORMAT_RGBA8_ETC2_EAC: return 4; 1515 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: return 4; 1516 case FORMAT_RGBA_ASTC_4x4_KHR: 1517 case FORMAT_RGBA_ASTC_5x4_KHR: 1518 case FORMAT_RGBA_ASTC_5x5_KHR: 1519 case FORMAT_RGBA_ASTC_6x5_KHR: 1520 case FORMAT_RGBA_ASTC_6x6_KHR: 1521 case FORMAT_RGBA_ASTC_8x5_KHR: 1522 case FORMAT_RGBA_ASTC_8x6_KHR: 1523 case FORMAT_RGBA_ASTC_8x8_KHR: 1524 case FORMAT_RGBA_ASTC_10x5_KHR: 1525 case FORMAT_RGBA_ASTC_10x6_KHR: 1526 case FORMAT_RGBA_ASTC_10x8_KHR: 1527 case FORMAT_RGBA_ASTC_10x10_KHR: 1528 case FORMAT_RGBA_ASTC_12x10_KHR: 1529 case FORMAT_RGBA_ASTC_12x12_KHR: 1530 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1531 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1532 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1533 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1534 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1535 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1536 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1537 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1538 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1539 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1540 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1541 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1542 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1543 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: return 0; // FIXME 1544 // Bumpmap formats 1545 case FORMAT_V8U8: return 2; 1546 case FORMAT_L6V5U5: return 2; 1547 case FORMAT_Q8W8V8U8: return 4; 1548 case FORMAT_X8L8V8U8: return 4; 1549 case FORMAT_A2W10V10U10: return 4; 1550 case FORMAT_V16U16: return 4; 1551 case FORMAT_A16W16V16U16: return 8; 1552 case FORMAT_Q16W16V16U16: return 8; 1553 // Luminance formats 1554 case FORMAT_L8: return 1; 1555 case FORMAT_A4L4: return 1; 1556 case FORMAT_L16: return 2; 1557 case FORMAT_A8L8: return 2; 1558 case FORMAT_L16F: return 2; 1559 case FORMAT_A16L16F: return 4; 1560 case FORMAT_L32F: return 4; 1561 case FORMAT_A32L32F: return 8; 1562 // Floating-point formats 1563 case FORMAT_A16F: return 2; 1564 case FORMAT_R16F: return 2; 1565 case FORMAT_G16R16F: return 4; 1566 case FORMAT_B16G16R16F: return 6; 1567 case FORMAT_A16B16G16R16F: return 8; 1568 case FORMAT_A32F: return 4; 1569 case FORMAT_R32F: return 4; 1570 case FORMAT_G32R32F: return 8; 1571 case FORMAT_B32G32R32F: return 12; 1572 case FORMAT_A32B32G32R32F: return 16; 1573 // Depth/stencil formats 1574 case FORMAT_D16: return 2; 1575 case FORMAT_D32: return 4; 1576 case FORMAT_D24X8: return 4; 1577 case FORMAT_D24S8: return 4; 1578 case FORMAT_D24FS8: return 4; 1579 case FORMAT_D32F: return 4; 1580 case FORMAT_D32F_COMPLEMENTARY: return 4; 1581 case FORMAT_D32F_LOCKABLE: return 4; 1582 case FORMAT_D32FS8_TEXTURE: return 4; 1583 case FORMAT_D32FS8_SHADOW: return 4; 1584 case FORMAT_DF24S8: return 4; 1585 case FORMAT_DF16S8: return 2; 1586 case FORMAT_INTZ: return 4; 1587 case FORMAT_S8: return 1; 1588 case FORMAT_YV12_BT601: return 1; // Y plane only 1589 case FORMAT_YV12_BT709: return 1; // Y plane only 1590 case FORMAT_YV12_JFIF: return 1; // Y plane only 1591 default: 1592 ASSERT(false); 1593 } 1594 1595 return 0; 1596 } 1597 1598 int Surface::pitchB(int width, Format format, bool target) 1599 { 1600 if(target || isDepth(format) || isStencil(format)) 1601 { 1602 width = align(width, 2); 1603 } 1604 1605 switch(format) 1606 { 1607 #if S3TC_SUPPORT 1608 case FORMAT_DXT1: 1609 #endif 1610 case FORMAT_ETC1: 1611 case FORMAT_R11_EAC: 1612 case FORMAT_SIGNED_R11_EAC: 1613 case FORMAT_RGB8_ETC2: 1614 case FORMAT_SRGB8_ETC2: 1615 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1616 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1617 return 8 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per 4 rows 1618 case FORMAT_RG11_EAC: 1619 case FORMAT_SIGNED_RG11_EAC: 1620 case FORMAT_RGBA8_ETC2_EAC: 1621 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1622 case FORMAT_RGBA_ASTC_4x4_KHR: 1623 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1624 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows 1625 case FORMAT_RGBA_ASTC_5x4_KHR: 1626 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1627 case FORMAT_RGBA_ASTC_5x5_KHR: 1628 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1629 return 16 * ((width + 4) / 5); 1630 case FORMAT_RGBA_ASTC_6x5_KHR: 1631 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1632 case FORMAT_RGBA_ASTC_6x6_KHR: 1633 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1634 return 16 * ((width + 5) / 6); 1635 case FORMAT_RGBA_ASTC_8x5_KHR: 1636 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1637 case FORMAT_RGBA_ASTC_8x6_KHR: 1638 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1639 case FORMAT_RGBA_ASTC_8x8_KHR: 1640 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1641 return 16 * ((width + 7) / 8); 1642 case FORMAT_RGBA_ASTC_10x5_KHR: 1643 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1644 case FORMAT_RGBA_ASTC_10x6_KHR: 1645 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1646 case FORMAT_RGBA_ASTC_10x8_KHR: 1647 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1648 case FORMAT_RGBA_ASTC_10x10_KHR: 1649 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1650 return 16 * ((width + 9) / 10); 1651 case FORMAT_RGBA_ASTC_12x10_KHR: 1652 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1653 case FORMAT_RGBA_ASTC_12x12_KHR: 1654 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 1655 return 16 * ((width + 11) / 12); 1656 #if S3TC_SUPPORT 1657 case FORMAT_DXT3: 1658 case FORMAT_DXT5: 1659 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows 1660 #endif 1661 case FORMAT_ATI1: 1662 return 2 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per row 1663 case FORMAT_ATI2: 1664 return 4 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per row 1665 case FORMAT_YV12_BT601: 1666 case FORMAT_YV12_BT709: 1667 case FORMAT_YV12_JFIF: 1668 return align(width, 16); 1669 default: 1670 return bytes(format) * width; 1671 } 1672 } 1673 1674 int Surface::pitchP(int width, Format format, bool target) 1675 { 1676 int B = bytes(format); 1677 1678 return B > 0 ? pitchB(width, format, target) / B : 0; 1679 } 1680 1681 int Surface::sliceB(int width, int height, Format format, bool target) 1682 { 1683 if(target || isDepth(format) || isStencil(format)) 1684 { 1685 height = ((height + 1) & ~1); 1686 } 1687 1688 switch(format) 1689 { 1690 #if S3TC_SUPPORT 1691 case FORMAT_DXT1: 1692 case FORMAT_DXT3: 1693 case FORMAT_DXT5: 1694 #endif 1695 case FORMAT_ETC1: 1696 case FORMAT_R11_EAC: 1697 case FORMAT_SIGNED_R11_EAC: 1698 case FORMAT_RG11_EAC: 1699 case FORMAT_SIGNED_RG11_EAC: 1700 case FORMAT_RGB8_ETC2: 1701 case FORMAT_SRGB8_ETC2: 1702 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1703 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1704 case FORMAT_RGBA8_ETC2_EAC: 1705 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1706 case FORMAT_RGBA_ASTC_4x4_KHR: 1707 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1708 case FORMAT_RGBA_ASTC_5x4_KHR: 1709 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1710 return pitchB(width, format, target) * ((height + 3) / 4); // Pitch computed per 4 rows 1711 case FORMAT_RGBA_ASTC_5x5_KHR: 1712 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1713 case FORMAT_RGBA_ASTC_6x5_KHR: 1714 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1715 case FORMAT_RGBA_ASTC_8x5_KHR: 1716 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1717 case FORMAT_RGBA_ASTC_10x5_KHR: 1718 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1719 return pitchB(width, format, target) * ((height + 4) / 5); // Pitch computed per 5 rows 1720 case FORMAT_RGBA_ASTC_6x6_KHR: 1721 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1722 case FORMAT_RGBA_ASTC_8x6_KHR: 1723 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1724 case FORMAT_RGBA_ASTC_10x6_KHR: 1725 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1726 return pitchB(width, format, target) * ((height + 5) / 6); // Pitch computed per 6 rows 1727 case FORMAT_RGBA_ASTC_8x8_KHR: 1728 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1729 case FORMAT_RGBA_ASTC_10x8_KHR: 1730 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1731 return pitchB(width, format, target) * ((height + 7) / 8); // Pitch computed per 8 rows 1732 case FORMAT_RGBA_ASTC_10x10_KHR: 1733 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1734 case FORMAT_RGBA_ASTC_12x10_KHR: 1735 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1736 return pitchB(width, format, target) * ((height + 9) / 10); // Pitch computed per 10 rows 1737 case FORMAT_RGBA_ASTC_12x12_KHR: 1738 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 1739 return pitchB(width, format, target) * ((height + 11) / 12); // Pitch computed per 12 rows 1740 case FORMAT_ATI1: 1741 case FORMAT_ATI2: 1742 default: 1743 return pitchB(width, format, target) * height; // Pitch computed per row 1744 } 1745 } 1746 1747 int Surface::sliceP(int width, int height, Format format, bool target) 1748 { 1749 int B = bytes(format); 1750 1751 return B > 0 ? sliceB(width, height, format, target) / B : 0; 1752 } 1753 1754 void Surface::update(Buffer &destination, Buffer &source) 1755 { 1756 // ASSERT(source.lock != LOCK_UNLOCKED); 1757 // ASSERT(destination.lock != LOCK_UNLOCKED); 1758 1759 if(destination.buffer != source.buffer) 1760 { 1761 ASSERT(source.dirty && !destination.dirty); 1762 1763 switch(source.format) 1764 { 1765 case FORMAT_R8G8B8: decodeR8G8B8(destination, source); break; // FIXME: Check destination format 1766 case FORMAT_X1R5G5B5: decodeX1R5G5B5(destination, source); break; // FIXME: Check destination format 1767 case FORMAT_A1R5G5B5: decodeA1R5G5B5(destination, source); break; // FIXME: Check destination format 1768 case FORMAT_X4R4G4B4: decodeX4R4G4B4(destination, source); break; // FIXME: Check destination format 1769 case FORMAT_A4R4G4B4: decodeA4R4G4B4(destination, source); break; // FIXME: Check destination format 1770 case FORMAT_P8: decodeP8(destination, source); break; // FIXME: Check destination format 1771 #if S3TC_SUPPORT 1772 case FORMAT_DXT1: decodeDXT1(destination, source); break; // FIXME: Check destination format 1773 case FORMAT_DXT3: decodeDXT3(destination, source); break; // FIXME: Check destination format 1774 case FORMAT_DXT5: decodeDXT5(destination, source); break; // FIXME: Check destination format 1775 #endif 1776 case FORMAT_ATI1: decodeATI1(destination, source); break; // FIXME: Check destination format 1777 case FORMAT_ATI2: decodeATI2(destination, source); break; // FIXME: Check destination format 1778 case FORMAT_R11_EAC: decodeEAC(destination, source, 1, false); break; // FIXME: Check destination format 1779 case FORMAT_SIGNED_R11_EAC: decodeEAC(destination, source, 1, true); break; // FIXME: Check destination format 1780 case FORMAT_RG11_EAC: decodeEAC(destination, source, 2, false); break; // FIXME: Check destination format 1781 case FORMAT_SIGNED_RG11_EAC: decodeEAC(destination, source, 2, true); break; // FIXME: Check destination format 1782 case FORMAT_ETC1: 1783 case FORMAT_RGB8_ETC2: decodeETC2(destination, source, 0, false); break; // FIXME: Check destination format 1784 case FORMAT_SRGB8_ETC2: decodeETC2(destination, source, 0, true); break; // FIXME: Check destination format 1785 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, false); break; // FIXME: Check destination format 1786 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, true); break; // FIXME: Check destination format 1787 case FORMAT_RGBA8_ETC2_EAC: decodeETC2(destination, source, 8, false); break; // FIXME: Check destination format 1788 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: decodeETC2(destination, source, 8, true); break; // FIXME: Check destination format 1789 case FORMAT_RGBA_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, false); break; // FIXME: Check destination format 1790 case FORMAT_RGBA_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, false); break; // FIXME: Check destination format 1791 case FORMAT_RGBA_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, false); break; // FIXME: Check destination format 1792 case FORMAT_RGBA_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, false); break; // FIXME: Check destination format 1793 case FORMAT_RGBA_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, false); break; // FIXME: Check destination format 1794 case FORMAT_RGBA_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, false); break; // FIXME: Check destination format 1795 case FORMAT_RGBA_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, false); break; // FIXME: Check destination format 1796 case FORMAT_RGBA_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, false); break; // FIXME: Check destination format 1797 case FORMAT_RGBA_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, false); break; // FIXME: Check destination format 1798 case FORMAT_RGBA_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, false); break; // FIXME: Check destination format 1799 case FORMAT_RGBA_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, false); break; // FIXME: Check destination format 1800 case FORMAT_RGBA_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, false); break; // FIXME: Check destination format 1801 case FORMAT_RGBA_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, false); break; // FIXME: Check destination format 1802 case FORMAT_RGBA_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, false); break; // FIXME: Check destination format 1803 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, true); break; // FIXME: Check destination format 1804 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, true); break; // FIXME: Check destination format 1805 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, true); break; // FIXME: Check destination format 1806 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, true); break; // FIXME: Check destination format 1807 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, true); break; // FIXME: Check destination format 1808 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, true); break; // FIXME: Check destination format 1809 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, true); break; // FIXME: Check destination format 1810 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, true); break; // FIXME: Check destination format 1811 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, true); break; // FIXME: Check destination format 1812 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, true); break; // FIXME: Check destination format 1813 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, true); break; // FIXME: Check destination format 1814 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, true); break; // FIXME: Check destination format 1815 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, true); break; // FIXME: Check destination format 1816 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, true); break; // FIXME: Check destination format 1817 default: genericUpdate(destination, source); break; 1818 } 1819 } 1820 } 1821 1822 void Surface::genericUpdate(Buffer &destination, Buffer &source) 1823 { 1824 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1825 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1826 1827 int depth = min(destination.depth, source.depth); 1828 int height = min(destination.height, source.height); 1829 int width = min(destination.width, source.width); 1830 int rowBytes = width * source.bytes; 1831 1832 for(int z = 0; z < depth; z++) 1833 { 1834 unsigned char *sourceRow = sourceSlice; 1835 unsigned char *destinationRow = destinationSlice; 1836 1837 for(int y = 0; y < height; y++) 1838 { 1839 if(source.format == destination.format) 1840 { 1841 memcpy(destinationRow, sourceRow, rowBytes); 1842 } 1843 else 1844 { 1845 unsigned char *sourceElement = sourceRow; 1846 unsigned char *destinationElement = destinationRow; 1847 1848 for(int x = 0; x < width; x++) 1849 { 1850 Color<float> color = source.read(sourceElement); 1851 destination.write(destinationElement, color); 1852 1853 sourceElement += source.bytes; 1854 destinationElement += destination.bytes; 1855 } 1856 } 1857 1858 sourceRow += source.pitchB; 1859 destinationRow += destination.pitchB; 1860 } 1861 1862 sourceSlice += source.sliceB; 1863 destinationSlice += destination.sliceB; 1864 } 1865 } 1866 1867 void Surface::decodeR8G8B8(Buffer &destination, const Buffer &source) 1868 { 1869 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1870 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1871 1872 for(int z = 0; z < destination.depth && z < source.depth; z++) 1873 { 1874 unsigned char *sourceRow = sourceSlice; 1875 unsigned char *destinationRow = destinationSlice; 1876 1877 for(int y = 0; y < destination.height && y < source.height; y++) 1878 { 1879 unsigned char *sourceElement = sourceRow; 1880 unsigned char *destinationElement = destinationRow; 1881 1882 for(int x = 0; x < destination.width && x < source.width; x++) 1883 { 1884 unsigned int b = sourceElement[0]; 1885 unsigned int g = sourceElement[1]; 1886 unsigned int r = sourceElement[2]; 1887 1888 *(unsigned int*)destinationElement = 0xFF000000 | (r << 16) | (g << 8) | (b << 0); 1889 1890 sourceElement += source.bytes; 1891 destinationElement += destination.bytes; 1892 } 1893 1894 sourceRow += source.pitchB; 1895 destinationRow += destination.pitchB; 1896 } 1897 1898 sourceSlice += source.sliceB; 1899 destinationSlice += destination.sliceB; 1900 } 1901 } 1902 1903 void Surface::decodeX1R5G5B5(Buffer &destination, const Buffer &source) 1904 { 1905 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1906 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1907 1908 for(int z = 0; z < destination.depth && z < source.depth; z++) 1909 { 1910 unsigned char *sourceRow = sourceSlice; 1911 unsigned char *destinationRow = destinationSlice; 1912 1913 for(int y = 0; y < destination.height && y < source.height; y++) 1914 { 1915 unsigned char *sourceElement = sourceRow; 1916 unsigned char *destinationElement = destinationRow; 1917 1918 for(int x = 0; x < destination.width && x < source.width; x++) 1919 { 1920 unsigned int xrgb = *(unsigned short*)sourceElement; 1921 1922 unsigned int r = (((xrgb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000; 1923 unsigned int g = (((xrgb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00; 1924 unsigned int b = (((xrgb & 0x001F) * 2106 + 0x80) >> 8); 1925 1926 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b; 1927 1928 sourceElement += source.bytes; 1929 destinationElement += destination.bytes; 1930 } 1931 1932 sourceRow += source.pitchB; 1933 destinationRow += destination.pitchB; 1934 } 1935 1936 sourceSlice += source.sliceB; 1937 destinationSlice += destination.sliceB; 1938 } 1939 } 1940 1941 void Surface::decodeA1R5G5B5(Buffer &destination, const Buffer &source) 1942 { 1943 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1944 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1945 1946 for(int z = 0; z < destination.depth && z < source.depth; z++) 1947 { 1948 unsigned char *sourceRow = sourceSlice; 1949 unsigned char *destinationRow = destinationSlice; 1950 1951 for(int y = 0; y < destination.height && y < source.height; y++) 1952 { 1953 unsigned char *sourceElement = sourceRow; 1954 unsigned char *destinationElement = destinationRow; 1955 1956 for(int x = 0; x < destination.width && x < source.width; x++) 1957 { 1958 unsigned int argb = *(unsigned short*)sourceElement; 1959 1960 unsigned int a = (argb & 0x8000) * 130560; 1961 unsigned int r = (((argb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000; 1962 unsigned int g = (((argb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00; 1963 unsigned int b = (((argb & 0x001F) * 2106 + 0x80) >> 8); 1964 1965 *(unsigned int*)destinationElement = a | r | g | b; 1966 1967 sourceElement += source.bytes; 1968 destinationElement += destination.bytes; 1969 } 1970 1971 sourceRow += source.pitchB; 1972 destinationRow += destination.pitchB; 1973 } 1974 1975 sourceSlice += source.sliceB; 1976 destinationSlice += destination.sliceB; 1977 } 1978 } 1979 1980 void Surface::decodeX4R4G4B4(Buffer &destination, const Buffer &source) 1981 { 1982 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1983 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1984 1985 for(int z = 0; z < destination.depth && z < source.depth; z++) 1986 { 1987 unsigned char *sourceRow = sourceSlice; 1988 unsigned char *destinationRow = destinationSlice; 1989 1990 for(int y = 0; y < destination.height && y < source.height; y++) 1991 { 1992 unsigned char *sourceElement = sourceRow; 1993 unsigned char *destinationElement = destinationRow; 1994 1995 for(int x = 0; x < destination.width && x < source.width; x++) 1996 { 1997 unsigned int xrgb = *(unsigned short*)sourceElement; 1998 1999 unsigned int r = ((xrgb & 0x0F00) * 0x00001100) & 0x00FF0000; 2000 unsigned int g = ((xrgb & 0x00F0) * 0x00000110) & 0x0000FF00; 2001 unsigned int b = (xrgb & 0x000F) * 0x00000011; 2002 2003 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b; 2004 2005 sourceElement += source.bytes; 2006 destinationElement += destination.bytes; 2007 } 2008 2009 sourceRow += source.pitchB; 2010 destinationRow += destination.pitchB; 2011 } 2012 2013 sourceSlice += source.sliceB; 2014 destinationSlice += destination.sliceB; 2015 } 2016 } 2017 2018 void Surface::decodeA4R4G4B4(Buffer &destination, const Buffer &source) 2019 { 2020 unsigned char *sourceSlice = (unsigned char*)source.buffer; 2021 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 2022 2023 for(int z = 0; z < destination.depth && z < source.depth; z++) 2024 { 2025 unsigned char *sourceRow = sourceSlice; 2026 unsigned char *destinationRow = destinationSlice; 2027 2028 for(int y = 0; y < destination.height && y < source.height; y++) 2029 { 2030 unsigned char *sourceElement = sourceRow; 2031 unsigned char *destinationElement = destinationRow; 2032 2033 for(int x = 0; x < destination.width && x < source.width; x++) 2034 { 2035 unsigned int argb = *(unsigned short*)sourceElement; 2036 2037 unsigned int a = ((argb & 0xF000) * 0x00011000) & 0xFF000000; 2038 unsigned int r = ((argb & 0x0F00) * 0x00001100) & 0x00FF0000; 2039 unsigned int g = ((argb & 0x00F0) * 0x00000110) & 0x0000FF00; 2040 unsigned int b = (argb & 0x000F) * 0x00000011; 2041 2042 *(unsigned int*)destinationElement = a | r | g | b; 2043 2044 sourceElement += source.bytes; 2045 destinationElement += destination.bytes; 2046 } 2047 2048 sourceRow += source.pitchB; 2049 destinationRow += destination.pitchB; 2050 } 2051 2052 sourceSlice += source.sliceB; 2053 destinationSlice += destination.sliceB; 2054 } 2055 } 2056 2057 void Surface::decodeP8(Buffer &destination, const Buffer &source) 2058 { 2059 unsigned char *sourceSlice = (unsigned char*)source.buffer; 2060 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 2061 2062 for(int z = 0; z < destination.depth && z < source.depth; z++) 2063 { 2064 unsigned char *sourceRow = sourceSlice; 2065 unsigned char *destinationRow = destinationSlice; 2066 2067 for(int y = 0; y < destination.height && y < source.height; y++) 2068 { 2069 unsigned char *sourceElement = sourceRow; 2070 unsigned char *destinationElement = destinationRow; 2071 2072 for(int x = 0; x < destination.width && x < source.width; x++) 2073 { 2074 unsigned int abgr = palette[*(unsigned char*)sourceElement]; 2075 2076 unsigned int r = (abgr & 0x000000FF) << 16; 2077 unsigned int g = (abgr & 0x0000FF00) << 0; 2078 unsigned int b = (abgr & 0x00FF0000) >> 16; 2079 unsigned int a = (abgr & 0xFF000000) >> 0; 2080 2081 *(unsigned int*)destinationElement = a | r | g | b; 2082 2083 sourceElement += source.bytes; 2084 destinationElement += destination.bytes; 2085 } 2086 2087 sourceRow += source.pitchB; 2088 destinationRow += destination.pitchB; 2089 } 2090 2091 sourceSlice += source.sliceB; 2092 destinationSlice += destination.sliceB; 2093 } 2094 } 2095 2096#if S3TC_SUPPORT 2097 void Surface::decodeDXT1(Buffer &internal, const Buffer &external) 2098 { 2099 unsigned int *destSlice = (unsigned int*)internal.buffer; 2100 const DXT1 *source = (const DXT1*)external.buffer; 2101 2102 for(int z = 0; z < external.depth; z++) 2103 { 2104 unsigned int *dest = destSlice; 2105 2106 for(int y = 0; y < external.height; y += 4) 2107 { 2108 for(int x = 0; x < external.width; x += 4) 2109 { 2110 Color<byte> c[4]; 2111 2112 c[0] = source->c0; 2113 c[1] = source->c1; 2114 2115 if(source->c0 > source->c1) // No transparency 2116 { 2117 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2118 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2119 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2120 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2121 c[2].a = 0xFF; 2122 2123 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2124 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2125 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2126 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2127 c[3].a = 0xFF; 2128 } 2129 else // c3 transparent 2130 { 2131 // c2 = 1 / 2 * c0 + 1 / 2 * c1 2132 c[2].r = (byte)(((word)c[0].r + (word)c[1].r) / 2); 2133 c[2].g = (byte)(((word)c[0].g + (word)c[1].g) / 2); 2134 c[2].b = (byte)(((word)c[0].b + (word)c[1].b) / 2); 2135 c[2].a = 0xFF; 2136 2137 c[3].r = 0; 2138 c[3].g = 0; 2139 c[3].b = 0; 2140 c[3].a = 0; 2141 } 2142 2143 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2144 { 2145 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2146 { 2147 dest[(x + i) + (y + j) * internal.width] = c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4]; 2148 } 2149 } 2150 2151 source++; 2152 } 2153 } 2154 2155 (byte*&)destSlice += internal.sliceB; 2156 } 2157 } 2158 2159 void Surface::decodeDXT3(Buffer &internal, const Buffer &external) 2160 { 2161 unsigned int *destSlice = (unsigned int*)internal.buffer; 2162 const DXT3 *source = (const DXT3*)external.buffer; 2163 2164 for(int z = 0; z < external.depth; z++) 2165 { 2166 unsigned int *dest = destSlice; 2167 2168 for(int y = 0; y < external.height; y += 4) 2169 { 2170 for(int x = 0; x < external.width; x += 4) 2171 { 2172 Color<byte> c[4]; 2173 2174 c[0] = source->c0; 2175 c[1] = source->c1; 2176 2177 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2178 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2179 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2180 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2181 2182 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2183 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2184 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2185 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2186 2187 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2188 { 2189 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2190 { 2191 unsigned int a = (unsigned int)(source->a >> 4 * (i + j * 4)) & 0x0F; 2192 unsigned int color = (c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | ((a << 28) + (a << 24)); 2193 2194 dest[(x + i) + (y + j) * internal.width] = color; 2195 } 2196 } 2197 2198 source++; 2199 } 2200 } 2201 2202 (byte*&)destSlice += internal.sliceB; 2203 } 2204 } 2205 2206 void Surface::decodeDXT5(Buffer &internal, const Buffer &external) 2207 { 2208 unsigned int *destSlice = (unsigned int*)internal.buffer; 2209 const DXT5 *source = (const DXT5*)external.buffer; 2210 2211 for(int z = 0; z < external.depth; z++) 2212 { 2213 unsigned int *dest = destSlice; 2214 2215 for(int y = 0; y < external.height; y += 4) 2216 { 2217 for(int x = 0; x < external.width; x += 4) 2218 { 2219 Color<byte> c[4]; 2220 2221 c[0] = source->c0; 2222 c[1] = source->c1; 2223 2224 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2225 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2226 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2227 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2228 2229 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2230 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2231 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2232 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2233 2234 byte a[8]; 2235 2236 a[0] = source->a0; 2237 a[1] = source->a1; 2238 2239 if(a[0] > a[1]) 2240 { 2241 a[2] = (byte)((6 * (word)a[0] + 1 * (word)a[1] + 3) / 7); 2242 a[3] = (byte)((5 * (word)a[0] + 2 * (word)a[1] + 3) / 7); 2243 a[4] = (byte)((4 * (word)a[0] + 3 * (word)a[1] + 3) / 7); 2244 a[5] = (byte)((3 * (word)a[0] + 4 * (word)a[1] + 3) / 7); 2245 a[6] = (byte)((2 * (word)a[0] + 5 * (word)a[1] + 3) / 7); 2246 a[7] = (byte)((1 * (word)a[0] + 6 * (word)a[1] + 3) / 7); 2247 } 2248 else 2249 { 2250 a[2] = (byte)((4 * (word)a[0] + 1 * (word)a[1] + 2) / 5); 2251 a[3] = (byte)((3 * (word)a[0] + 2 * (word)a[1] + 2) / 5); 2252 a[4] = (byte)((2 * (word)a[0] + 3 * (word)a[1] + 2) / 5); 2253 a[5] = (byte)((1 * (word)a[0] + 4 * (word)a[1] + 2) / 5); 2254 a[6] = 0; 2255 a[7] = 0xFF; 2256 } 2257 2258 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2259 { 2260 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2261 { 2262 unsigned int alpha = (unsigned int)a[(unsigned int)(source->alut >> (16 + 3 * (i + j * 4))) % 8] << 24; 2263 unsigned int color = (c[(source->clut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | alpha; 2264 2265 dest[(x + i) + (y + j) * internal.width] = color; 2266 } 2267 } 2268 2269 source++; 2270 } 2271 } 2272 2273 (byte*&)destSlice += internal.sliceB; 2274 } 2275 } 2276#endif 2277 2278 void Surface::decodeATI1(Buffer &internal, const Buffer &external) 2279 { 2280 byte *destSlice = (byte*)internal.buffer; 2281 const ATI1 *source = (const ATI1*)external.buffer; 2282 2283 for(int z = 0; z < external.depth; z++) 2284 { 2285 byte *dest = destSlice; 2286 2287 for(int y = 0; y < external.height; y += 4) 2288 { 2289 for(int x = 0; x < external.width; x += 4) 2290 { 2291 byte r[8]; 2292 2293 r[0] = source->r0; 2294 r[1] = source->r1; 2295 2296 if(r[0] > r[1]) 2297 { 2298 r[2] = (byte)((6 * (word)r[0] + 1 * (word)r[1] + 3) / 7); 2299 r[3] = (byte)((5 * (word)r[0] + 2 * (word)r[1] + 3) / 7); 2300 r[4] = (byte)((4 * (word)r[0] + 3 * (word)r[1] + 3) / 7); 2301 r[5] = (byte)((3 * (word)r[0] + 4 * (word)r[1] + 3) / 7); 2302 r[6] = (byte)((2 * (word)r[0] + 5 * (word)r[1] + 3) / 7); 2303 r[7] = (byte)((1 * (word)r[0] + 6 * (word)r[1] + 3) / 7); 2304 } 2305 else 2306 { 2307 r[2] = (byte)((4 * (word)r[0] + 1 * (word)r[1] + 2) / 5); 2308 r[3] = (byte)((3 * (word)r[0] + 2 * (word)r[1] + 2) / 5); 2309 r[4] = (byte)((2 * (word)r[0] + 3 * (word)r[1] + 2) / 5); 2310 r[5] = (byte)((1 * (word)r[0] + 4 * (word)r[1] + 2) / 5); 2311 r[6] = 0; 2312 r[7] = 0xFF; 2313 } 2314 2315 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2316 { 2317 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2318 { 2319 dest[(x + i) + (y + j) * internal.width] = r[(unsigned int)(source->rlut >> (16 + 3 * (i + j * 4))) % 8]; 2320 } 2321 } 2322 2323 source++; 2324 } 2325 } 2326 2327 destSlice += internal.sliceB; 2328 } 2329 } 2330 2331 void Surface::decodeATI2(Buffer &internal, const Buffer &external) 2332 { 2333 word *destSlice = (word*)internal.buffer; 2334 const ATI2 *source = (const ATI2*)external.buffer; 2335 2336 for(int z = 0; z < external.depth; z++) 2337 { 2338 word *dest = destSlice; 2339 2340 for(int y = 0; y < external.height; y += 4) 2341 { 2342 for(int x = 0; x < external.width; x += 4) 2343 { 2344 byte X[8]; 2345 2346 X[0] = source->x0; 2347 X[1] = source->x1; 2348 2349 if(X[0] > X[1]) 2350 { 2351 X[2] = (byte)((6 * (word)X[0] + 1 * (word)X[1] + 3) / 7); 2352 X[3] = (byte)((5 * (word)X[0] + 2 * (word)X[1] + 3) / 7); 2353 X[4] = (byte)((4 * (word)X[0] + 3 * (word)X[1] + 3) / 7); 2354 X[5] = (byte)((3 * (word)X[0] + 4 * (word)X[1] + 3) / 7); 2355 X[6] = (byte)((2 * (word)X[0] + 5 * (word)X[1] + 3) / 7); 2356 X[7] = (byte)((1 * (word)X[0] + 6 * (word)X[1] + 3) / 7); 2357 } 2358 else 2359 { 2360 X[2] = (byte)((4 * (word)X[0] + 1 * (word)X[1] + 2) / 5); 2361 X[3] = (byte)((3 * (word)X[0] + 2 * (word)X[1] + 2) / 5); 2362 X[4] = (byte)((2 * (word)X[0] + 3 * (word)X[1] + 2) / 5); 2363 X[5] = (byte)((1 * (word)X[0] + 4 * (word)X[1] + 2) / 5); 2364 X[6] = 0; 2365 X[7] = 0xFF; 2366 } 2367 2368 byte Y[8]; 2369 2370 Y[0] = source->y0; 2371 Y[1] = source->y1; 2372 2373 if(Y[0] > Y[1]) 2374 { 2375 Y[2] = (byte)((6 * (word)Y[0] + 1 * (word)Y[1] + 3) / 7); 2376 Y[3] = (byte)((5 * (word)Y[0] + 2 * (word)Y[1] + 3) / 7); 2377 Y[4] = (byte)((4 * (word)Y[0] + 3 * (word)Y[1] + 3) / 7); 2378 Y[5] = (byte)((3 * (word)Y[0] + 4 * (word)Y[1] + 3) / 7); 2379 Y[6] = (byte)((2 * (word)Y[0] + 5 * (word)Y[1] + 3) / 7); 2380 Y[7] = (byte)((1 * (word)Y[0] + 6 * (word)Y[1] + 3) / 7); 2381 } 2382 else 2383 { 2384 Y[2] = (byte)((4 * (word)Y[0] + 1 * (word)Y[1] + 2) / 5); 2385 Y[3] = (byte)((3 * (word)Y[0] + 2 * (word)Y[1] + 2) / 5); 2386 Y[4] = (byte)((2 * (word)Y[0] + 3 * (word)Y[1] + 2) / 5); 2387 Y[5] = (byte)((1 * (word)Y[0] + 4 * (word)Y[1] + 2) / 5); 2388 Y[6] = 0; 2389 Y[7] = 0xFF; 2390 } 2391 2392 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2393 { 2394 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2395 { 2396 word r = X[(unsigned int)(source->xlut >> (16 + 3 * (i + j * 4))) % 8]; 2397 word g = Y[(unsigned int)(source->ylut >> (16 + 3 * (i + j * 4))) % 8]; 2398 2399 dest[(x + i) + (y + j) * internal.width] = (g << 8) + r; 2400 } 2401 } 2402 2403 source++; 2404 } 2405 } 2406 2407 (byte*&)destSlice += internal.sliceB; 2408 } 2409 } 2410 2411 void Surface::decodeETC2(Buffer &internal, const Buffer &external, int nbAlphaBits, bool isSRGB) 2412 { 2413 ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes, 2414 (nbAlphaBits == 8) ? ETC_Decoder::ETC_RGBA : ((nbAlphaBits == 1) ? ETC_Decoder::ETC_RGB_PUNCHTHROUGH_ALPHA : ETC_Decoder::ETC_RGB)); 2415 2416 if(isSRGB) 2417 { 2418 static byte sRGBtoLinearTable[256]; 2419 static bool sRGBtoLinearTableDirty = true; 2420 if(sRGBtoLinearTableDirty) 2421 { 2422 for(int i = 0; i < 256; i++) 2423 { 2424 sRGBtoLinearTable[i] = static_cast<byte>(sRGBtoLinear(static_cast<float>(i) / 255.0f) * 255.0f + 0.5f); 2425 } 2426 sRGBtoLinearTableDirty = false; 2427 } 2428 2429 // Perform sRGB conversion in place after decoding 2430 byte* src = (byte*)internal.buffer; 2431 for(int y = 0; y < internal.height; y++) 2432 { 2433 byte* srcRow = src + y * internal.pitchB; 2434 for(int x = 0; x < internal.width; x++) 2435 { 2436 byte* srcPix = srcRow + x * internal.bytes; 2437 for(int i = 0; i < 3; i++) 2438 { 2439 srcPix[i] = sRGBtoLinearTable[srcPix[i]]; 2440 } 2441 } 2442 } 2443 } 2444 } 2445 2446 void Surface::decodeEAC(Buffer &internal, const Buffer &external, int nbChannels, bool isSigned) 2447 { 2448 ASSERT(nbChannels == 1 || nbChannels == 2); 2449 2450 ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes, 2451 (nbChannels == 1) ? (isSigned ? ETC_Decoder::ETC_R_SIGNED : ETC_Decoder::ETC_R_UNSIGNED) : (isSigned ? ETC_Decoder::ETC_RG_SIGNED : ETC_Decoder::ETC_RG_UNSIGNED)); 2452 2453 // FIXME: We convert signed data to float, until signed integer internal formats are supported 2454 // This code can be removed if signed ETC2 images are decoded to internal 8 bit signed R/RG formats 2455 if(isSigned) 2456 { 2457 sbyte* src = (sbyte*)internal.buffer; 2458 2459 for(int y = 0; y < internal.height; y++) 2460 { 2461 sbyte* srcRow = src + y * internal.pitchB; 2462 for(int x = internal.width - 1; x >= 0; x--) 2463 { 2464 int dx = x & 0xFFFFFFFC; 2465 int mx = x - dx; 2466 sbyte* srcPix = srcRow + dx * internal.bytes + mx * nbChannels; 2467 float* dstPix = (float*)(srcRow + x * internal.bytes); 2468 for(int c = nbChannels - 1; c >= 0; c--) 2469 { 2470 static const float normalization = 1.0f / 127.875f; 2471 dstPix[c] = clamp(static_cast<float>(srcPix[c]) * normalization, -1.0f, 1.0f); 2472 } 2473 } 2474 } 2475 } 2476 } 2477 2478 void Surface::decodeASTC(Buffer &internal, const Buffer &external, int xBlockSize, int yBlockSize, int zBlockSize, bool isSRGB) 2479 { 2480 } 2481 2482 unsigned int Surface::size(int width, int height, int depth, Format format) 2483 { 2484 // Dimensions rounded up to multiples of 4, used for compressed formats 2485 int width4 = align(width, 4); 2486 int height4 = align(height, 4); 2487 2488 switch(format) 2489 { 2490 #if S3TC_SUPPORT 2491 case FORMAT_DXT1: 2492 #endif 2493 case FORMAT_ATI1: 2494 case FORMAT_ETC1: 2495 case FORMAT_R11_EAC: 2496 case FORMAT_SIGNED_R11_EAC: 2497 case FORMAT_RGB8_ETC2: 2498 case FORMAT_SRGB8_ETC2: 2499 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2500 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2501 return width4 * height4 * depth / 2; 2502 #if S3TC_SUPPORT 2503 case FORMAT_DXT3: 2504 case FORMAT_DXT5: 2505 #endif 2506 case FORMAT_ATI2: 2507 case FORMAT_RG11_EAC: 2508 case FORMAT_SIGNED_RG11_EAC: 2509 case FORMAT_RGBA8_ETC2_EAC: 2510 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 2511 case FORMAT_RGBA_ASTC_4x4_KHR: 2512 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 2513 return width4 * height4 * depth; 2514 case FORMAT_RGBA_ASTC_5x4_KHR: 2515 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 2516 return align(width, 5) * height4 * depth; 2517 case FORMAT_RGBA_ASTC_5x5_KHR: 2518 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 2519 return align(width, 5) * align(height, 5) * depth; 2520 case FORMAT_RGBA_ASTC_6x5_KHR: 2521 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 2522 return align(width, 6) * align(height, 5) * depth; 2523 case FORMAT_RGBA_ASTC_6x6_KHR: 2524 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 2525 return align(width, 6) * align(height, 6) * depth; 2526 case FORMAT_RGBA_ASTC_8x5_KHR: 2527 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 2528 return align(width, 8) * align(height, 5) * depth; 2529 case FORMAT_RGBA_ASTC_8x6_KHR: 2530 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 2531 return align(width, 8) * align(height, 6) * depth; 2532 case FORMAT_RGBA_ASTC_8x8_KHR: 2533 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 2534 return align(width, 8) * align(height, 8) * depth; 2535 case FORMAT_RGBA_ASTC_10x5_KHR: 2536 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 2537 return align(width, 10) * align(height, 5) * depth; 2538 case FORMAT_RGBA_ASTC_10x6_KHR: 2539 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 2540 return align(width, 10) * align(height, 6) * depth; 2541 case FORMAT_RGBA_ASTC_10x8_KHR: 2542 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 2543 return align(width, 10) * align(height, 8) * depth; 2544 case FORMAT_RGBA_ASTC_10x10_KHR: 2545 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 2546 return align(width, 10) * align(height, 10) * depth; 2547 case FORMAT_RGBA_ASTC_12x10_KHR: 2548 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 2549 return align(width, 12) * align(height, 10) * depth; 2550 case FORMAT_RGBA_ASTC_12x12_KHR: 2551 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 2552 return align(width, 12) * align(height, 12) * depth; 2553 case FORMAT_YV12_BT601: 2554 case FORMAT_YV12_BT709: 2555 case FORMAT_YV12_JFIF: 2556 { 2557 unsigned int YStride = align(width, 16); 2558 unsigned int YSize = YStride * height; 2559 unsigned int CStride = align(YStride / 2, 16); 2560 unsigned int CSize = CStride * height / 2; 2561 2562 return YSize + 2 * CSize; 2563 } 2564 default: 2565 return bytes(format) * width * height * depth; 2566 } 2567 2568 return 0; 2569 } 2570 2571 bool Surface::isStencil(Format format) 2572 { 2573 switch(format) 2574 { 2575 case FORMAT_D32: 2576 case FORMAT_D16: 2577 case FORMAT_D24X8: 2578 case FORMAT_D32F: 2579 case FORMAT_D32F_COMPLEMENTARY: 2580 case FORMAT_D32F_LOCKABLE: 2581 return false; 2582 case FORMAT_D24S8: 2583 case FORMAT_D24FS8: 2584 case FORMAT_S8: 2585 case FORMAT_DF24S8: 2586 case FORMAT_DF16S8: 2587 case FORMAT_D32FS8_TEXTURE: 2588 case FORMAT_D32FS8_SHADOW: 2589 case FORMAT_INTZ: 2590 return true; 2591 default: 2592 return false; 2593 } 2594 } 2595 2596 bool Surface::isDepth(Format format) 2597 { 2598 switch(format) 2599 { 2600 case FORMAT_D32: 2601 case FORMAT_D16: 2602 case FORMAT_D24X8: 2603 case FORMAT_D24S8: 2604 case FORMAT_D24FS8: 2605 case FORMAT_D32F: 2606 case FORMAT_D32F_COMPLEMENTARY: 2607 case FORMAT_D32F_LOCKABLE: 2608 case FORMAT_DF24S8: 2609 case FORMAT_DF16S8: 2610 case FORMAT_D32FS8_TEXTURE: 2611 case FORMAT_D32FS8_SHADOW: 2612 case FORMAT_INTZ: 2613 return true; 2614 case FORMAT_S8: 2615 return false; 2616 default: 2617 return false; 2618 } 2619 } 2620 2621 bool Surface::isPalette(Format format) 2622 { 2623 switch(format) 2624 { 2625 case FORMAT_P8: 2626 case FORMAT_A8P8: 2627 return true; 2628 default: 2629 return false; 2630 } 2631 } 2632 2633 bool Surface::isFloatFormat(Format format) 2634 { 2635 switch(format) 2636 { 2637 case FORMAT_R5G6B5: 2638 case FORMAT_X8R8G8B8: 2639 case FORMAT_X8B8G8R8I: 2640 case FORMAT_X8B8G8R8: 2641 case FORMAT_A8R8G8B8: 2642 case FORMAT_A8B8G8R8I: 2643 case FORMAT_R8UI: 2644 case FORMAT_G8R8UI: 2645 case FORMAT_X8B8G8R8UI: 2646 case FORMAT_A8B8G8R8UI: 2647 case FORMAT_A8B8G8R8: 2648 case FORMAT_G8R8I: 2649 case FORMAT_G8R8: 2650 case FORMAT_R8I_SNORM: 2651 case FORMAT_G8R8I_SNORM: 2652 case FORMAT_X8B8G8R8I_SNORM: 2653 case FORMAT_A8B8G8R8I_SNORM: 2654 case FORMAT_R16I: 2655 case FORMAT_R16UI: 2656 case FORMAT_G16R16I: 2657 case FORMAT_G16R16UI: 2658 case FORMAT_G16R16: 2659 case FORMAT_X16B16G16R16I: 2660 case FORMAT_X16B16G16R16UI: 2661 case FORMAT_A16B16G16R16I: 2662 case FORMAT_A16B16G16R16UI: 2663 case FORMAT_A16B16G16R16: 2664 case FORMAT_V8U8: 2665 case FORMAT_Q8W8V8U8: 2666 case FORMAT_X8L8V8U8: 2667 case FORMAT_V16U16: 2668 case FORMAT_A16W16V16U16: 2669 case FORMAT_Q16W16V16U16: 2670 case FORMAT_A8: 2671 case FORMAT_R8I: 2672 case FORMAT_R8: 2673 case FORMAT_L8: 2674 case FORMAT_L16: 2675 case FORMAT_A8L8: 2676 case FORMAT_YV12_BT601: 2677 case FORMAT_YV12_BT709: 2678 case FORMAT_YV12_JFIF: 2679 case FORMAT_R32I: 2680 case FORMAT_R32UI: 2681 case FORMAT_G32R32I: 2682 case FORMAT_G32R32UI: 2683 case FORMAT_X32B32G32R32I: 2684 case FORMAT_X32B32G32R32UI: 2685 case FORMAT_A32B32G32R32I: 2686 case FORMAT_A32B32G32R32UI: 2687 return false; 2688 case FORMAT_R32F: 2689 case FORMAT_G32R32F: 2690 case FORMAT_A32B32G32R32F: 2691 case FORMAT_D32F: 2692 case FORMAT_D32F_COMPLEMENTARY: 2693 case FORMAT_D32F_LOCKABLE: 2694 case FORMAT_D32FS8_TEXTURE: 2695 case FORMAT_D32FS8_SHADOW: 2696 case FORMAT_L16F: 2697 case FORMAT_A16L16F: 2698 case FORMAT_L32F: 2699 case FORMAT_A32L32F: 2700 return true; 2701 default: 2702 ASSERT(false); 2703 } 2704 2705 return false; 2706 } 2707 2708 bool Surface::isUnsignedComponent(Format format, int component) 2709 { 2710 switch(format) 2711 { 2712 case FORMAT_NULL: 2713 case FORMAT_R5G6B5: 2714 case FORMAT_X8R8G8B8: 2715 case FORMAT_X8B8G8R8: 2716 case FORMAT_A8R8G8B8: 2717 case FORMAT_A8B8G8R8: 2718 case FORMAT_G8R8: 2719 case FORMAT_R16UI: 2720 case FORMAT_G16R16: 2721 case FORMAT_G16R16UI: 2722 case FORMAT_X16B16G16R16UI: 2723 case FORMAT_A16B16G16R16: 2724 case FORMAT_A16B16G16R16UI: 2725 case FORMAT_R32UI: 2726 case FORMAT_G32R32UI: 2727 case FORMAT_X32B32G32R32UI: 2728 case FORMAT_A32B32G32R32UI: 2729 case FORMAT_R8UI: 2730 case FORMAT_G8R8UI: 2731 case FORMAT_X8B8G8R8UI: 2732 case FORMAT_A8B8G8R8UI: 2733 case FORMAT_D32F: 2734 case FORMAT_D32F_COMPLEMENTARY: 2735 case FORMAT_D32F_LOCKABLE: 2736 case FORMAT_D32FS8_TEXTURE: 2737 case FORMAT_D32FS8_SHADOW: 2738 case FORMAT_A8: 2739 case FORMAT_R8: 2740 case FORMAT_L8: 2741 case FORMAT_L16: 2742 case FORMAT_A8L8: 2743 case FORMAT_YV12_BT601: 2744 case FORMAT_YV12_BT709: 2745 case FORMAT_YV12_JFIF: 2746 return true; 2747 case FORMAT_A8B8G8R8I: 2748 case FORMAT_A16B16G16R16I: 2749 case FORMAT_A32B32G32R32I: 2750 case FORMAT_A8B8G8R8I_SNORM: 2751 case FORMAT_Q8W8V8U8: 2752 case FORMAT_Q16W16V16U16: 2753 case FORMAT_A32B32G32R32F: 2754 return false; 2755 case FORMAT_R32F: 2756 case FORMAT_R8I: 2757 case FORMAT_R16I: 2758 case FORMAT_R32I: 2759 case FORMAT_R8I_SNORM: 2760 return component >= 1; 2761 case FORMAT_V8U8: 2762 case FORMAT_X8L8V8U8: 2763 case FORMAT_V16U16: 2764 case FORMAT_G32R32F: 2765 case FORMAT_G8R8I: 2766 case FORMAT_G16R16I: 2767 case FORMAT_G32R32I: 2768 case FORMAT_G8R8I_SNORM: 2769 return component >= 2; 2770 case FORMAT_A16W16V16U16: 2771 case FORMAT_X8B8G8R8I: 2772 case FORMAT_X16B16G16R16I: 2773 case FORMAT_X32B32G32R32I: 2774 case FORMAT_X8B8G8R8I_SNORM: 2775 return component >= 3; 2776 default: 2777 ASSERT(false); 2778 } 2779 2780 return false; 2781 } 2782 2783 bool Surface::isSRGBreadable(Format format) 2784 { 2785 // Keep in sync with Capabilities::isSRGBreadable 2786 switch(format) 2787 { 2788 case FORMAT_L8: 2789 case FORMAT_A8L8: 2790 case FORMAT_R8G8B8: 2791 case FORMAT_A8R8G8B8: 2792 case FORMAT_X8R8G8B8: 2793 case FORMAT_A8B8G8R8: 2794 case FORMAT_X8B8G8R8: 2795 case FORMAT_R5G6B5: 2796 case FORMAT_X1R5G5B5: 2797 case FORMAT_A1R5G5B5: 2798 case FORMAT_A4R4G4B4: 2799 #if S3TC_SUPPORT 2800 case FORMAT_DXT1: 2801 case FORMAT_DXT3: 2802 case FORMAT_DXT5: 2803 #endif 2804 case FORMAT_ATI1: 2805 case FORMAT_ATI2: 2806 return true; 2807 default: 2808 return false; 2809 } 2810 2811 return false; 2812 } 2813 2814 bool Surface::isSRGBwritable(Format format) 2815 { 2816 // Keep in sync with Capabilities::isSRGBwritable 2817 switch(format) 2818 { 2819 case FORMAT_NULL: 2820 case FORMAT_A8R8G8B8: 2821 case FORMAT_X8R8G8B8: 2822 case FORMAT_A8B8G8R8: 2823 case FORMAT_X8B8G8R8: 2824 case FORMAT_R5G6B5: 2825 return true; 2826 default: 2827 return false; 2828 } 2829 } 2830 2831 bool Surface::isCompressed(Format format) 2832 { 2833 switch(format) 2834 { 2835 #if S3TC_SUPPORT 2836 case FORMAT_DXT1: 2837 case FORMAT_DXT3: 2838 case FORMAT_DXT5: 2839 #endif 2840 case FORMAT_ATI1: 2841 case FORMAT_ATI2: 2842 case FORMAT_ETC1: 2843 case FORMAT_R11_EAC: 2844 case FORMAT_SIGNED_R11_EAC: 2845 case FORMAT_RG11_EAC: 2846 case FORMAT_SIGNED_RG11_EAC: 2847 case FORMAT_RGB8_ETC2: 2848 case FORMAT_SRGB8_ETC2: 2849 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2850 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2851 case FORMAT_RGBA8_ETC2_EAC: 2852 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 2853 case FORMAT_RGBA_ASTC_4x4_KHR: 2854 case FORMAT_RGBA_ASTC_5x4_KHR: 2855 case FORMAT_RGBA_ASTC_5x5_KHR: 2856 case FORMAT_RGBA_ASTC_6x5_KHR: 2857 case FORMAT_RGBA_ASTC_6x6_KHR: 2858 case FORMAT_RGBA_ASTC_8x5_KHR: 2859 case FORMAT_RGBA_ASTC_8x6_KHR: 2860 case FORMAT_RGBA_ASTC_8x8_KHR: 2861 case FORMAT_RGBA_ASTC_10x5_KHR: 2862 case FORMAT_RGBA_ASTC_10x6_KHR: 2863 case FORMAT_RGBA_ASTC_10x8_KHR: 2864 case FORMAT_RGBA_ASTC_10x10_KHR: 2865 case FORMAT_RGBA_ASTC_12x10_KHR: 2866 case FORMAT_RGBA_ASTC_12x12_KHR: 2867 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 2868 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 2869 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 2870 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 2871 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 2872 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 2873 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 2874 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 2875 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 2876 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 2877 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 2878 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 2879 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 2880 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 2881 return true; 2882 default: 2883 return false; 2884 } 2885 } 2886 2887 bool Surface::isNonNormalizedInteger(Format format) 2888 { 2889 switch(format) 2890 { 2891 case FORMAT_A8B8G8R8I: 2892 case FORMAT_X8B8G8R8I: 2893 case FORMAT_G8R8I: 2894 case FORMAT_R8I: 2895 case FORMAT_A8B8G8R8UI: 2896 case FORMAT_X8B8G8R8UI: 2897 case FORMAT_G8R8UI: 2898 case FORMAT_R8UI: 2899 case FORMAT_A16B16G16R16I: 2900 case FORMAT_X16B16G16R16I: 2901 case FORMAT_G16R16I: 2902 case FORMAT_R16I: 2903 case FORMAT_A16B16G16R16UI: 2904 case FORMAT_X16B16G16R16UI: 2905 case FORMAT_G16R16UI: 2906 case FORMAT_R16UI: 2907 case FORMAT_A32B32G32R32I: 2908 case FORMAT_X32B32G32R32I: 2909 case FORMAT_G32R32I: 2910 case FORMAT_R32I: 2911 case FORMAT_A32B32G32R32UI: 2912 case FORMAT_X32B32G32R32UI: 2913 case FORMAT_G32R32UI: 2914 case FORMAT_R32UI: 2915 return true; 2916 default: 2917 return false; 2918 } 2919 } 2920 2921 int Surface::componentCount(Format format) 2922 { 2923 switch(format) 2924 { 2925 case FORMAT_R5G6B5: return 3; 2926 case FORMAT_X8R8G8B8: return 3; 2927 case FORMAT_X8B8G8R8I: return 3; 2928 case FORMAT_X8B8G8R8: return 3; 2929 case FORMAT_A8R8G8B8: return 4; 2930 case FORMAT_A8B8G8R8I: return 4; 2931 case FORMAT_A8B8G8R8: return 4; 2932 case FORMAT_G8R8I: return 2; 2933 case FORMAT_G8R8: return 2; 2934 case FORMAT_R8I_SNORM: return 1; 2935 case FORMAT_G8R8I_SNORM: return 2; 2936 case FORMAT_X8B8G8R8I_SNORM:return 3; 2937 case FORMAT_A8B8G8R8I_SNORM:return 4; 2938 case FORMAT_R8UI: return 1; 2939 case FORMAT_G8R8UI: return 2; 2940 case FORMAT_X8B8G8R8UI: return 3; 2941 case FORMAT_A8B8G8R8UI: return 4; 2942 case FORMAT_G16R16I: return 2; 2943 case FORMAT_G16R16UI: return 2; 2944 case FORMAT_G16R16: return 2; 2945 case FORMAT_G32R32I: return 2; 2946 case FORMAT_G32R32UI: return 2; 2947 case FORMAT_X16B16G16R16I: return 3; 2948 case FORMAT_X16B16G16R16UI: return 3; 2949 case FORMAT_A16B16G16R16I: return 4; 2950 case FORMAT_A16B16G16R16UI: return 4; 2951 case FORMAT_A16B16G16R16: return 4; 2952 case FORMAT_X32B32G32R32I: return 3; 2953 case FORMAT_X32B32G32R32UI: return 3; 2954 case FORMAT_A32B32G32R32I: return 4; 2955 case FORMAT_A32B32G32R32UI: return 4; 2956 case FORMAT_V8U8: return 2; 2957 case FORMAT_Q8W8V8U8: return 4; 2958 case FORMAT_X8L8V8U8: return 3; 2959 case FORMAT_V16U16: return 2; 2960 case FORMAT_A16W16V16U16: return 4; 2961 case FORMAT_Q16W16V16U16: return 4; 2962 case FORMAT_R32F: return 1; 2963 case FORMAT_G32R32F: return 2; 2964 case FORMAT_A32B32G32R32F: return 4; 2965 case FORMAT_D32F: return 1; 2966 case FORMAT_D32F_LOCKABLE: return 1; 2967 case FORMAT_D32FS8_TEXTURE: return 1; 2968 case FORMAT_D32FS8_SHADOW: return 1; 2969 case FORMAT_A8: return 1; 2970 case FORMAT_R8I: return 1; 2971 case FORMAT_R8: return 1; 2972 case FORMAT_R16I: return 1; 2973 case FORMAT_R16UI: return 1; 2974 case FORMAT_R32I: return 1; 2975 case FORMAT_R32UI: return 1; 2976 case FORMAT_L8: return 1; 2977 case FORMAT_L16: return 1; 2978 case FORMAT_A8L8: return 2; 2979 case FORMAT_YV12_BT601: return 3; 2980 case FORMAT_YV12_BT709: return 3; 2981 case FORMAT_YV12_JFIF: return 3; 2982 default: 2983 ASSERT(false); 2984 } 2985 2986 return 1; 2987 } 2988 2989 void *Surface::allocateBuffer(int width, int height, int depth, Format format) 2990 { 2991 // Render targets require 2x2 quads 2992 int width2 = (width + 1) & ~1; 2993 int height2 = (height + 1) & ~1; 2994 2995 // FIXME: Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes, 2996 // so we have to allocate 4 extra bytes to avoid buffer overruns. 2997 return allocateZero(size(width2, height2, depth, format) + 4); 2998 } 2999 3000 void Surface::memfill4(void *buffer, int pattern, int bytes) 3001 { 3002 while((size_t)buffer & 0x1 && bytes >= 1) 3003 { 3004 *(char*)buffer = (char)pattern; 3005 (char*&)buffer += 1; 3006 bytes -= 1; 3007 } 3008 3009 while((size_t)buffer & 0x3 && bytes >= 2) 3010 { 3011 *(short*)buffer = (short)pattern; 3012 (short*&)buffer += 1; 3013 bytes -= 2; 3014 } 3015 3016 if(CPUID::supportsSSE()) 3017 { 3018 while((size_t)buffer & 0xF && bytes >= 4) 3019 { 3020 *(int*)buffer = pattern; 3021 (int*&)buffer += 1; 3022 bytes -= 4; 3023 } 3024 3025 __m128 quad = _mm_set_ps1((float&)pattern); 3026 3027 float *pointer = (float*)buffer; 3028 int qxwords = bytes / 64; 3029 bytes -= qxwords * 64; 3030 3031 while(qxwords--) 3032 { 3033 _mm_stream_ps(pointer + 0, quad); 3034 _mm_stream_ps(pointer + 4, quad); 3035 _mm_stream_ps(pointer + 8, quad); 3036 _mm_stream_ps(pointer + 12, quad); 3037 3038 pointer += 16; 3039 } 3040 3041 buffer = pointer; 3042 } 3043 3044 while(bytes >= 4) 3045 { 3046 *(int*)buffer = (int)pattern; 3047 (int*&)buffer += 1; 3048 bytes -= 4; 3049 } 3050 3051 while(bytes >= 2) 3052 { 3053 *(short*)buffer = (short)pattern; 3054 (short*&)buffer += 1; 3055 bytes -= 2; 3056 } 3057 3058 while(bytes >= 1) 3059 { 3060 *(char*)buffer = (char)pattern; 3061 (char*&)buffer += 1; 3062 bytes -= 1; 3063 } 3064 } 3065 3066 bool Surface::isEntire(const SliceRect& rect) const 3067 { 3068 return (rect.x0 == 0 && rect.y0 == 0 && rect.x1 == internal.width && rect.y1 == internal.height && internal.depth == 1); 3069 } 3070 3071 bool Surface::getClearRect(int x0, int y0, int width, int height, SliceRect& rect) const 3072 { 3073 // Not overlapping 3074 if(x0 > internal.width) return false; 3075 if(y0 > internal.height) return false; 3076 if(x0 + width < 0) return false; 3077 if(y0 + height < 0) return false; 3078 3079 // Clip against dimensions 3080 if(x0 < 0) { width += x0; x0 = 0; } 3081 if(x0 + width > internal.width) width = internal.width - x0; 3082 if(y0 < 0) { height += y0; y0 = 0; } 3083 if(y0 + height > internal.height) height = internal.height - y0; 3084 3085 rect.x0 = x0; 3086 rect.x1 = x0 + width; 3087 rect.y0 = y0; 3088 rect.y1 = y0 + height; 3089 3090 return true; 3091 } 3092 3093 void Surface::clearDepthBuffer(float depth, int x0, int y0, int width, int height) 3094 { 3095 // Not overlapping 3096 if(x0 > internal.width) return; 3097 if(y0 > internal.height) return; 3098 if(x0 + width < 0) return; 3099 if(y0 + height < 0) return; 3100 3101 // Clip against dimensions 3102 if(x0 < 0) {width += x0; x0 = 0;} 3103 if(x0 + width > internal.width) width = internal.width - x0; 3104 if(y0 < 0) {height += y0; y0 = 0;} 3105 if(y0 + height > internal.height) height = internal.height - y0; 3106 3107 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height; 3108 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY; 3109 3110 int width2 = (internal.width + 1) & ~1; 3111 3112 int x1 = x0 + width; 3113 int y1 = y0 + height; 3114 3115 if(internal.format == FORMAT_D32F_LOCKABLE || 3116 internal.format == FORMAT_D32FS8_TEXTURE || 3117 internal.format == FORMAT_D32FS8_SHADOW) 3118 { 3119 float *target = (float*)lockInternal(0, 0, 0, lock, PUBLIC) + x0 + width2 * y0; 3120 3121 for(int z = 0; z < internal.depth; z++) 3122 { 3123 for(int y = y0; y < y1; y++) 3124 { 3125 memfill4(target, (int&)depth, 4 * width); 3126 target += width2; 3127 } 3128 } 3129 3130 unlockInternal(); 3131 } 3132 else // Quad layout 3133 { 3134 if(complementaryDepthBuffer) 3135 { 3136 depth = 1 - depth; 3137 } 3138 3139 float *buffer = (float*)lockInternal(0, 0, 0, lock, PUBLIC); 3140 3141 for(int z = 0; z < internal.depth; z++) 3142 { 3143 for(int y = y0; y < y1; y++) 3144 { 3145 float *target = buffer + (y & ~1) * width2 + (y & 1) * 2; 3146 3147 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once 3148 { 3149 if((x0 & 1) != 0) 3150 { 3151 target[(x0 & ~1) * 2 + 1] = depth; 3152 target[(x0 & ~1) * 2 + 3] = depth; 3153 } 3154 3155 // for(int x2 = ((x0 + 1) & ~1) * 2; x2 < x1 * 2; x2 += 4) 3156 // { 3157 // target[x2 + 0] = depth; 3158 // target[x2 + 1] = depth; 3159 // target[x2 + 2] = depth; 3160 // target[x2 + 3] = depth; 3161 // } 3162 3163 // __asm 3164 // { 3165 // movss xmm0, depth 3166 // shufps xmm0, xmm0, 0x00 3167 // 3168 // mov eax, x0 3169 // add eax, 1 3170 // and eax, 0xFFFFFFFE 3171 // cmp eax, x1 3172 // jge qEnd 3173 // 3174 // mov edi, target 3175 // 3176 // qLoop: 3177 // movntps [edi+8*eax], xmm0 3178 // 3179 // add eax, 2 3180 // cmp eax, x1 3181 // jl qLoop 3182 // qEnd: 3183 // } 3184 3185 memfill4(&target[((x0 + 1) & ~1) * 2], (int&)depth, 8 * ((x1 & ~1) - ((x0 + 1) & ~1))); 3186 3187 if((x1 & 1) != 0) 3188 { 3189 target[(x1 & ~1) * 2 + 0] = depth; 3190 target[(x1 & ~1) * 2 + 2] = depth; 3191 } 3192 3193 y++; 3194 } 3195 else 3196 { 3197 for(int x = x0; x < x1; x++) 3198 { 3199 target[(x & ~1) * 2 + (x & 1)] = depth; 3200 } 3201 } 3202 } 3203 3204 buffer += internal.sliceP; 3205 } 3206 3207 unlockInternal(); 3208 } 3209 } 3210 3211 void Surface::clearStencilBuffer(unsigned char s, unsigned char mask, int x0, int y0, int width, int height) 3212 { 3213 // Not overlapping 3214 if(x0 > internal.width) return; 3215 if(y0 > internal.height) return; 3216 if(x0 + width < 0) return; 3217 if(y0 + height < 0) return; 3218 3219 // Clip against dimensions 3220 if(x0 < 0) {width += x0; x0 = 0;} 3221 if(x0 + width > internal.width) width = internal.width - x0; 3222 if(y0 < 0) {height += y0; y0 = 0;} 3223 if(y0 + height > internal.height) height = internal.height - y0; 3224 3225 int width2 = (internal.width + 1) & ~1; 3226 3227 int x1 = x0 + width; 3228 int y1 = y0 + height; 3229 3230 unsigned char maskedS = s & mask; 3231 unsigned char invMask = ~mask; 3232 unsigned int fill = maskedS; 3233 fill = fill | (fill << 8) | (fill << 16) + (fill << 24); 3234 3235 if(false) 3236 { 3237 char *target = (char*)lockStencil(0, PUBLIC) + x0 + width2 * y0; 3238 3239 for(int z = 0; z < stencil.depth; z++) 3240 { 3241 for(int y = y0; y < y0 + height; y++) 3242 { 3243 if(mask == 0xFF) 3244 { 3245 memfill4(target, fill, width); 3246 } 3247 else 3248 { 3249 for(int x = 0; x < width; x++) 3250 { 3251 target[x] = maskedS | (target[x] & invMask); 3252 } 3253 } 3254 3255 target += width2; 3256 } 3257 } 3258 3259 unlockStencil(); 3260 } 3261 else // Quad layout 3262 { 3263 char *buffer = (char*)lockStencil(0, PUBLIC); 3264 3265 if(mask == 0xFF) 3266 { 3267 for(int z = 0; z < stencil.depth; z++) 3268 { 3269 for(int y = y0; y < y1; y++) 3270 { 3271 char *target = buffer + (y & ~1) * width2 + (y & 1) * 2; 3272 3273 if((y & 1) == 0 && y + 1 < y1 && mask == 0xFF) // Fill quad line at once 3274 { 3275 if((x0 & 1) != 0) 3276 { 3277 target[(x0 & ~1) * 2 + 1] = fill; 3278 target[(x0 & ~1) * 2 + 3] = fill; 3279 } 3280 3281 memfill4(&target[((x0 + 1) & ~1) * 2], fill, ((x1 + 1) & ~1) * 2 - ((x0 + 1) & ~1) * 2); 3282 3283 if((x1 & 1) != 0) 3284 { 3285 target[(x1 & ~1) * 2 + 0] = fill; 3286 target[(x1 & ~1) * 2 + 2] = fill; 3287 } 3288 3289 y++; 3290 } 3291 else 3292 { 3293 for(int x = x0; x < x1; x++) 3294 { 3295 target[(x & ~1) * 2 + (x & 1)] = maskedS | (target[x] & invMask); 3296 } 3297 } 3298 } 3299 3300 buffer += stencil.sliceP; 3301 } 3302 } 3303 3304 unlockStencil(); 3305 } 3306 } 3307 3308 void Surface::fill(const Color<float> &color, int x0, int y0, int width, int height) 3309 { 3310 unsigned char *row; 3311 Buffer *buffer; 3312 3313 if(internal.dirty) 3314 { 3315 row = (unsigned char*)lockInternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC); 3316 buffer = &internal; 3317 } 3318 else 3319 { 3320 row = (unsigned char*)lockExternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC); 3321 buffer = &external; 3322 } 3323 3324 if(buffer->bytes <= 4) 3325 { 3326 int c; 3327 buffer->write(&c, color); 3328 3329 if(buffer->bytes <= 1) c = (c << 8) | c; 3330 if(buffer->bytes <= 2) c = (c << 16) | c; 3331 3332 for(int y = 0; y < height; y++) 3333 { 3334 memfill4(row, c, width * buffer->bytes); 3335 3336 row += buffer->pitchB; 3337 } 3338 } 3339 else // Generic 3340 { 3341 for(int y = 0; y < height; y++) 3342 { 3343 unsigned char *element = row; 3344 3345 for(int x = 0; x < width; x++) 3346 { 3347 buffer->write(element, color); 3348 3349 element += buffer->bytes; 3350 } 3351 3352 row += buffer->pitchB; 3353 } 3354 } 3355 3356 if(buffer == &internal) 3357 { 3358 unlockInternal(); 3359 } 3360 else 3361 { 3362 unlockExternal(); 3363 } 3364 } 3365 3366 void Surface::copyInternal(const Surface* source, int x, int y, float srcX, float srcY, bool filter) 3367 { 3368 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED); 3369 3370 sw::Color<float> color; 3371 3372 if(!filter) 3373 { 3374 color = source->internal.read((int)srcX, (int)srcY); 3375 } 3376 else // Bilinear filtering 3377 { 3378 color = source->internal.sample(srcX, srcY); 3379 } 3380 3381 internal.write(x, y, color); 3382 } 3383 3384 void Surface::copyInternal(const Surface* source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter) 3385 { 3386 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED); 3387 3388 sw::Color<float> color; 3389 3390 if(!filter) 3391 { 3392 color = source->internal.read((int)srcX, (int)srcY, int(srcZ)); 3393 } 3394 else // Bilinear filtering 3395 { 3396 color = source->internal.sample(srcX, srcY, srcZ); 3397 } 3398 3399 internal.write(x, y, z, color); 3400 } 3401 3402 bool Surface::hasStencil() const 3403 { 3404 return isStencil(external.format); 3405 } 3406 3407 bool Surface::hasDepth() const 3408 { 3409 return isDepth(external.format); 3410 } 3411 3412 bool Surface::hasPalette() const 3413 { 3414 return isPalette(external.format); 3415 } 3416 3417 bool Surface::isRenderTarget() const 3418 { 3419 return renderTarget; 3420 } 3421 3422 bool Surface::hasDirtyMipmaps() const 3423 { 3424 return dirtyMipmaps; 3425 } 3426 3427 void Surface::cleanMipmaps() 3428 { 3429 dirtyMipmaps = false; 3430 } 3431 3432 Resource *Surface::getResource() 3433 { 3434 return resource; 3435 } 3436 3437 bool Surface::identicalFormats() const 3438 { 3439 return external.format == internal.format && 3440 external.width == internal.width && 3441 external.height == internal.height && 3442 external.depth == internal.depth && 3443 external.pitchB == internal.pitchB && 3444 external.sliceB == internal.sliceB; 3445 } 3446 3447 Format Surface::selectInternalFormat(Format format) const 3448 { 3449 switch(format) 3450 { 3451 case FORMAT_NULL: 3452 return FORMAT_NULL; 3453 case FORMAT_P8: 3454 case FORMAT_A8P8: 3455 case FORMAT_A4R4G4B4: 3456 case FORMAT_A1R5G5B5: 3457 case FORMAT_A8R3G3B2: 3458 return FORMAT_A8R8G8B8; 3459 case FORMAT_A8: 3460 return FORMAT_A8; 3461 case FORMAT_R8I: 3462 return FORMAT_R8I; 3463 case FORMAT_R8UI: 3464 return FORMAT_R8UI; 3465 case FORMAT_R8I_SNORM: 3466 return FORMAT_R8I_SNORM; 3467 case FORMAT_R8: 3468 return FORMAT_R8; 3469 case FORMAT_R16I: 3470 return FORMAT_R16I; 3471 case FORMAT_R16UI: 3472 return FORMAT_R16UI; 3473 case FORMAT_R32I: 3474 return FORMAT_R32I; 3475 case FORMAT_R32UI: 3476 return FORMAT_R32UI; 3477 case FORMAT_A2R10G10B10: 3478 case FORMAT_A2B10G10R10: 3479 case FORMAT_X16B16G16R16I: 3480 case FORMAT_A16B16G16R16I: 3481 return FORMAT_A16B16G16R16I; 3482 case FORMAT_X16B16G16R16UI: 3483 case FORMAT_A16B16G16R16UI: 3484 return FORMAT_A16B16G16R16UI; 3485 case FORMAT_A16B16G16R16: 3486 return FORMAT_A16B16G16R16; 3487 case FORMAT_X32B32G32R32I: 3488 case FORMAT_A32B32G32R32I: 3489 return FORMAT_A32B32G32R32I; 3490 case FORMAT_X32B32G32R32UI: 3491 case FORMAT_A32B32G32R32UI: 3492 return FORMAT_A32B32G32R32UI; 3493 case FORMAT_G8R8I: 3494 return FORMAT_G8R8I; 3495 case FORMAT_G8R8UI: 3496 return FORMAT_G8R8UI; 3497 case FORMAT_G8R8I_SNORM: 3498 return FORMAT_G8R8I_SNORM; 3499 case FORMAT_G8R8: 3500 return FORMAT_G8R8; 3501 case FORMAT_G16R16I: 3502 return FORMAT_G16R16I; 3503 case FORMAT_G16R16UI: 3504 return FORMAT_G16R16UI; 3505 case FORMAT_G16R16: 3506 return FORMAT_G16R16; 3507 case FORMAT_G32R32I: 3508 return FORMAT_G32R32I; 3509 case FORMAT_G32R32UI: 3510 return FORMAT_G32R32UI; 3511 case FORMAT_A8R8G8B8: 3512 if(lockable || !quadLayoutEnabled) 3513 { 3514 return FORMAT_A8R8G8B8; 3515 } 3516 else 3517 { 3518 return FORMAT_A8G8R8B8Q; 3519 } 3520 case FORMAT_A8B8G8R8I: 3521 return FORMAT_A8B8G8R8I; 3522 case FORMAT_A8B8G8R8UI: 3523 return FORMAT_A8B8G8R8UI; 3524 case FORMAT_A8B8G8R8I_SNORM: 3525 return FORMAT_A8B8G8R8I_SNORM; 3526 case FORMAT_R5G5B5A1: 3527 case FORMAT_R4G4B4A4: 3528 case FORMAT_A8B8G8R8: 3529 return FORMAT_A8B8G8R8; 3530 case FORMAT_R5G6B5: 3531 return FORMAT_R5G6B5; 3532 case FORMAT_R3G3B2: 3533 case FORMAT_R8G8B8: 3534 case FORMAT_X4R4G4B4: 3535 case FORMAT_X1R5G5B5: 3536 case FORMAT_X8R8G8B8: 3537 if(lockable || !quadLayoutEnabled) 3538 { 3539 return FORMAT_X8R8G8B8; 3540 } 3541 else 3542 { 3543 return FORMAT_X8G8R8B8Q; 3544 } 3545 case FORMAT_X8B8G8R8I: 3546 return FORMAT_X8B8G8R8I; 3547 case FORMAT_X8B8G8R8UI: 3548 return FORMAT_X8B8G8R8UI; 3549 case FORMAT_X8B8G8R8I_SNORM: 3550 return FORMAT_X8B8G8R8I_SNORM; 3551 case FORMAT_B8G8R8: 3552 case FORMAT_X8B8G8R8: 3553 return FORMAT_X8B8G8R8; 3554 // Compressed formats 3555 #if S3TC_SUPPORT 3556 case FORMAT_DXT1: 3557 case FORMAT_DXT3: 3558 case FORMAT_DXT5: 3559 #endif 3560 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3561 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3562 case FORMAT_RGBA8_ETC2_EAC: 3563 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 3564 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 3565 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 3566 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 3567 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 3568 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 3569 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 3570 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 3571 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 3572 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 3573 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 3574 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 3575 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 3576 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 3577 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 3578 return FORMAT_A8R8G8B8; 3579 case FORMAT_RGBA_ASTC_4x4_KHR: 3580 case FORMAT_RGBA_ASTC_5x4_KHR: 3581 case FORMAT_RGBA_ASTC_5x5_KHR: 3582 case FORMAT_RGBA_ASTC_6x5_KHR: 3583 case FORMAT_RGBA_ASTC_6x6_KHR: 3584 case FORMAT_RGBA_ASTC_8x5_KHR: 3585 case FORMAT_RGBA_ASTC_8x6_KHR: 3586 case FORMAT_RGBA_ASTC_8x8_KHR: 3587 case FORMAT_RGBA_ASTC_10x5_KHR: 3588 case FORMAT_RGBA_ASTC_10x6_KHR: 3589 case FORMAT_RGBA_ASTC_10x8_KHR: 3590 case FORMAT_RGBA_ASTC_10x10_KHR: 3591 case FORMAT_RGBA_ASTC_12x10_KHR: 3592 case FORMAT_RGBA_ASTC_12x12_KHR: 3593 // ASTC supports HDR, so a floating point format is required to represent it properly 3594 return FORMAT_A32B32G32R32F; // FIXME: 16FP is probably sufficient, but it's currently unsupported 3595 case FORMAT_ATI1: 3596 case FORMAT_R11_EAC: 3597 return FORMAT_R8; 3598 case FORMAT_SIGNED_R11_EAC: 3599 return FORMAT_R32F; // FIXME: Signed 8bit format would be sufficient 3600 case FORMAT_ATI2: 3601 case FORMAT_RG11_EAC: 3602 return FORMAT_G8R8; 3603 case FORMAT_SIGNED_RG11_EAC: 3604 return FORMAT_G32R32F; // FIXME: Signed 8bit format would be sufficient 3605 case FORMAT_ETC1: 3606 case FORMAT_RGB8_ETC2: 3607 case FORMAT_SRGB8_ETC2: 3608 return FORMAT_X8R8G8B8; 3609 // Bumpmap formats 3610 case FORMAT_V8U8: return FORMAT_V8U8; 3611 case FORMAT_L6V5U5: return FORMAT_X8L8V8U8; 3612 case FORMAT_Q8W8V8U8: return FORMAT_Q8W8V8U8; 3613 case FORMAT_X8L8V8U8: return FORMAT_X8L8V8U8; 3614 case FORMAT_V16U16: return FORMAT_V16U16; 3615 case FORMAT_A2W10V10U10: return FORMAT_A16W16V16U16; 3616 case FORMAT_Q16W16V16U16: return FORMAT_Q16W16V16U16; 3617 // Floating-point formats 3618 case FORMAT_A16F: return FORMAT_A32B32G32R32F; 3619 case FORMAT_R16F: return FORMAT_R32F; 3620 case FORMAT_G16R16F: return FORMAT_G32R32F; 3621 case FORMAT_B16G16R16F: return FORMAT_A32B32G32R32F; 3622 case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F; 3623 case FORMAT_A32F: return FORMAT_A32B32G32R32F; 3624 case FORMAT_R32F: return FORMAT_R32F; 3625 case FORMAT_G32R32F: return FORMAT_G32R32F; 3626 case FORMAT_B32G32R32F: return FORMAT_A32B32G32R32F; 3627 case FORMAT_A32B32G32R32F: return FORMAT_A32B32G32R32F; 3628 // Luminance formats 3629 case FORMAT_L8: return FORMAT_L8; 3630 case FORMAT_A4L4: return FORMAT_A8L8; 3631 case FORMAT_L16: return FORMAT_L16; 3632 case FORMAT_A8L8: return FORMAT_A8L8; 3633 case FORMAT_L16F: return FORMAT_A32B32G32R32F; 3634 case FORMAT_A16L16F: return FORMAT_A32B32G32R32F; 3635 case FORMAT_L32F: return FORMAT_A32B32G32R32F; 3636 case FORMAT_A32L32F: return FORMAT_A32B32G32R32F; 3637 // Depth/stencil formats 3638 case FORMAT_D16: 3639 case FORMAT_D32: 3640 case FORMAT_D24X8: 3641 case FORMAT_D24S8: 3642 case FORMAT_D24FS8: 3643 if(hasParent) // Texture 3644 { 3645 return FORMAT_D32FS8_SHADOW; 3646 } 3647 else if(complementaryDepthBuffer) 3648 { 3649 return FORMAT_D32F_COMPLEMENTARY; 3650 } 3651 else 3652 { 3653 return FORMAT_D32F; 3654 } 3655 case FORMAT_D32F: return FORMAT_D32F; 3656 case FORMAT_D32F_LOCKABLE: return FORMAT_D32F_LOCKABLE; 3657 case FORMAT_D32FS8_TEXTURE: return FORMAT_D32FS8_TEXTURE; 3658 case FORMAT_INTZ: return FORMAT_D32FS8_TEXTURE; 3659 case FORMAT_DF24S8: return FORMAT_D32FS8_SHADOW; 3660 case FORMAT_DF16S8: return FORMAT_D32FS8_SHADOW; 3661 case FORMAT_YV12_BT601: return FORMAT_YV12_BT601; 3662 case FORMAT_YV12_BT709: return FORMAT_YV12_BT709; 3663 case FORMAT_YV12_JFIF: return FORMAT_YV12_JFIF; 3664 default: 3665 ASSERT(false); 3666 } 3667 3668 return FORMAT_NULL; 3669 } 3670 3671 void Surface::setTexturePalette(unsigned int *palette) 3672 { 3673 Surface::palette = palette; 3674 Surface::paletteID++; 3675 } 3676 3677 void Surface::resolve() 3678 { 3679 if(internal.depth <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL) 3680 { 3681 return; 3682 } 3683 3684 void *source = internal.lockRect(0, 0, 0, LOCK_READWRITE); 3685 3686 int quality = internal.depth; 3687 int width = internal.width; 3688 int height = internal.height; 3689 int pitch = internal.pitchB; 3690 int slice = internal.sliceB; 3691 3692 unsigned char *source0 = (unsigned char*)source; 3693 unsigned char *source1 = source0 + slice; 3694 unsigned char *source2 = source1 + slice; 3695 unsigned char *source3 = source2 + slice; 3696 unsigned char *source4 = source3 + slice; 3697 unsigned char *source5 = source4 + slice; 3698 unsigned char *source6 = source5 + slice; 3699 unsigned char *source7 = source6 + slice; 3700 unsigned char *source8 = source7 + slice; 3701 unsigned char *source9 = source8 + slice; 3702 unsigned char *sourceA = source9 + slice; 3703 unsigned char *sourceB = sourceA + slice; 3704 unsigned char *sourceC = sourceB + slice; 3705 unsigned char *sourceD = sourceC + slice; 3706 unsigned char *sourceE = sourceD + slice; 3707 unsigned char *sourceF = sourceE + slice; 3708 3709 if(internal.format == FORMAT_X8R8G8B8 || internal.format == FORMAT_A8R8G8B8 || internal.format == FORMAT_X8B8G8R8 || internal.format == FORMAT_A8B8G8R8) 3710 { 3711 if(CPUID::supportsSSE2() && (width % 4) == 0) 3712 { 3713 if(internal.depth == 2) 3714 { 3715 for(int y = 0; y < height; y++) 3716 { 3717 for(int x = 0; x < width; x += 4) 3718 { 3719 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3720 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3721 3722 c0 = _mm_avg_epu8(c0, c1); 3723 3724 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3725 } 3726 3727 source0 += pitch; 3728 source1 += pitch; 3729 } 3730 } 3731 else if(internal.depth == 4) 3732 { 3733 for(int y = 0; y < height; y++) 3734 { 3735 for(int x = 0; x < width; x += 4) 3736 { 3737 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3738 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3739 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 3740 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 3741 3742 c0 = _mm_avg_epu8(c0, c1); 3743 c2 = _mm_avg_epu8(c2, c3); 3744 c0 = _mm_avg_epu8(c0, c2); 3745 3746 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3747 } 3748 3749 source0 += pitch; 3750 source1 += pitch; 3751 source2 += pitch; 3752 source3 += pitch; 3753 } 3754 } 3755 else if(internal.depth == 8) 3756 { 3757 for(int y = 0; y < height; y++) 3758 { 3759 for(int x = 0; x < width; x += 4) 3760 { 3761 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3762 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3763 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 3764 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 3765 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 3766 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 3767 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 3768 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 3769 3770 c0 = _mm_avg_epu8(c0, c1); 3771 c2 = _mm_avg_epu8(c2, c3); 3772 c4 = _mm_avg_epu8(c4, c5); 3773 c6 = _mm_avg_epu8(c6, c7); 3774 c0 = _mm_avg_epu8(c0, c2); 3775 c4 = _mm_avg_epu8(c4, c6); 3776 c0 = _mm_avg_epu8(c0, c4); 3777 3778 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3779 } 3780 3781 source0 += pitch; 3782 source1 += pitch; 3783 source2 += pitch; 3784 source3 += pitch; 3785 source4 += pitch; 3786 source5 += pitch; 3787 source6 += pitch; 3788 source7 += pitch; 3789 } 3790 } 3791 else if(internal.depth == 16) 3792 { 3793 for(int y = 0; y < height; y++) 3794 { 3795 for(int x = 0; x < width; x += 4) 3796 { 3797 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3798 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3799 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 3800 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 3801 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 3802 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 3803 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 3804 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 3805 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x)); 3806 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x)); 3807 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x)); 3808 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x)); 3809 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x)); 3810 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x)); 3811 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x)); 3812 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x)); 3813 3814 c0 = _mm_avg_epu8(c0, c1); 3815 c2 = _mm_avg_epu8(c2, c3); 3816 c4 = _mm_avg_epu8(c4, c5); 3817 c6 = _mm_avg_epu8(c6, c7); 3818 c8 = _mm_avg_epu8(c8, c9); 3819 cA = _mm_avg_epu8(cA, cB); 3820 cC = _mm_avg_epu8(cC, cD); 3821 cE = _mm_avg_epu8(cE, cF); 3822 c0 = _mm_avg_epu8(c0, c2); 3823 c4 = _mm_avg_epu8(c4, c6); 3824 c8 = _mm_avg_epu8(c8, cA); 3825 cC = _mm_avg_epu8(cC, cE); 3826 c0 = _mm_avg_epu8(c0, c4); 3827 c8 = _mm_avg_epu8(c8, cC); 3828 c0 = _mm_avg_epu8(c0, c8); 3829 3830 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3831 } 3832 3833 source0 += pitch; 3834 source1 += pitch; 3835 source2 += pitch; 3836 source3 += pitch; 3837 source4 += pitch; 3838 source5 += pitch; 3839 source6 += pitch; 3840 source7 += pitch; 3841 source8 += pitch; 3842 source9 += pitch; 3843 sourceA += pitch; 3844 sourceB += pitch; 3845 sourceC += pitch; 3846 sourceD += pitch; 3847 sourceE += pitch; 3848 sourceF += pitch; 3849 } 3850 } 3851 else ASSERT(false); 3852 } 3853 else 3854 { 3855 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7F7F7F7F) + (((x) ^ (y)) & 0x01010101)) 3856 3857 if(internal.depth == 2) 3858 { 3859 for(int y = 0; y < height; y++) 3860 { 3861 for(int x = 0; x < width; x++) 3862 { 3863 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 3864 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 3865 3866 c0 = AVERAGE(c0, c1); 3867 3868 *(unsigned int*)(source0 + 4 * x) = c0; 3869 } 3870 3871 source0 += pitch; 3872 source1 += pitch; 3873 } 3874 } 3875 else if(internal.depth == 4) 3876 { 3877 for(int y = 0; y < height; y++) 3878 { 3879 for(int x = 0; x < width; x++) 3880 { 3881 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 3882 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 3883 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 3884 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 3885 3886 c0 = AVERAGE(c0, c1); 3887 c2 = AVERAGE(c2, c3); 3888 c0 = AVERAGE(c0, c2); 3889 3890 *(unsigned int*)(source0 + 4 * x) = c0; 3891 } 3892 3893 source0 += pitch; 3894 source1 += pitch; 3895 source2 += pitch; 3896 source3 += pitch; 3897 } 3898 } 3899 else if(internal.depth == 8) 3900 { 3901 for(int y = 0; y < height; y++) 3902 { 3903 for(int x = 0; x < width; x++) 3904 { 3905 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 3906 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 3907 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 3908 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 3909 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 3910 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 3911 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 3912 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 3913 3914 c0 = AVERAGE(c0, c1); 3915 c2 = AVERAGE(c2, c3); 3916 c4 = AVERAGE(c4, c5); 3917 c6 = AVERAGE(c6, c7); 3918 c0 = AVERAGE(c0, c2); 3919 c4 = AVERAGE(c4, c6); 3920 c0 = AVERAGE(c0, c4); 3921 3922 *(unsigned int*)(source0 + 4 * x) = c0; 3923 } 3924 3925 source0 += pitch; 3926 source1 += pitch; 3927 source2 += pitch; 3928 source3 += pitch; 3929 source4 += pitch; 3930 source5 += pitch; 3931 source6 += pitch; 3932 source7 += pitch; 3933 } 3934 } 3935 else if(internal.depth == 16) 3936 { 3937 for(int y = 0; y < height; y++) 3938 { 3939 for(int x = 0; x < width; x++) 3940 { 3941 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 3942 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 3943 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 3944 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 3945 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 3946 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 3947 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 3948 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 3949 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 3950 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 3951 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 3952 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 3953 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 3954 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 3955 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 3956 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 3957 3958 c0 = AVERAGE(c0, c1); 3959 c2 = AVERAGE(c2, c3); 3960 c4 = AVERAGE(c4, c5); 3961 c6 = AVERAGE(c6, c7); 3962 c8 = AVERAGE(c8, c9); 3963 cA = AVERAGE(cA, cB); 3964 cC = AVERAGE(cC, cD); 3965 cE = AVERAGE(cE, cF); 3966 c0 = AVERAGE(c0, c2); 3967 c4 = AVERAGE(c4, c6); 3968 c8 = AVERAGE(c8, cA); 3969 cC = AVERAGE(cC, cE); 3970 c0 = AVERAGE(c0, c4); 3971 c8 = AVERAGE(c8, cC); 3972 c0 = AVERAGE(c0, c8); 3973 3974 *(unsigned int*)(source0 + 4 * x) = c0; 3975 } 3976 3977 source0 += pitch; 3978 source1 += pitch; 3979 source2 += pitch; 3980 source3 += pitch; 3981 source4 += pitch; 3982 source5 += pitch; 3983 source6 += pitch; 3984 source7 += pitch; 3985 source8 += pitch; 3986 source9 += pitch; 3987 sourceA += pitch; 3988 sourceB += pitch; 3989 sourceC += pitch; 3990 sourceD += pitch; 3991 sourceE += pitch; 3992 sourceF += pitch; 3993 } 3994 } 3995 else ASSERT(false); 3996 3997 #undef AVERAGE 3998 } 3999 } 4000 else if(internal.format == FORMAT_G16R16) 4001 { 4002 if(CPUID::supportsSSE2() && (width % 4) == 0) 4003 { 4004 if(internal.depth == 2) 4005 { 4006 for(int y = 0; y < height; y++) 4007 { 4008 for(int x = 0; x < width; x += 4) 4009 { 4010 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4011 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4012 4013 c0 = _mm_avg_epu16(c0, c1); 4014 4015 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4016 } 4017 4018 source0 += pitch; 4019 source1 += pitch; 4020 } 4021 } 4022 else if(internal.depth == 4) 4023 { 4024 for(int y = 0; y < height; y++) 4025 { 4026 for(int x = 0; x < width; x += 4) 4027 { 4028 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4029 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4030 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4031 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4032 4033 c0 = _mm_avg_epu16(c0, c1); 4034 c2 = _mm_avg_epu16(c2, c3); 4035 c0 = _mm_avg_epu16(c0, c2); 4036 4037 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4038 } 4039 4040 source0 += pitch; 4041 source1 += pitch; 4042 source2 += pitch; 4043 source3 += pitch; 4044 } 4045 } 4046 else if(internal.depth == 8) 4047 { 4048 for(int y = 0; y < height; y++) 4049 { 4050 for(int x = 0; x < width; x += 4) 4051 { 4052 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4053 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4054 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4055 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4056 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4057 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4058 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4059 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4060 4061 c0 = _mm_avg_epu16(c0, c1); 4062 c2 = _mm_avg_epu16(c2, c3); 4063 c4 = _mm_avg_epu16(c4, c5); 4064 c6 = _mm_avg_epu16(c6, c7); 4065 c0 = _mm_avg_epu16(c0, c2); 4066 c4 = _mm_avg_epu16(c4, c6); 4067 c0 = _mm_avg_epu16(c0, c4); 4068 4069 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4070 } 4071 4072 source0 += pitch; 4073 source1 += pitch; 4074 source2 += pitch; 4075 source3 += pitch; 4076 source4 += pitch; 4077 source5 += pitch; 4078 source6 += pitch; 4079 source7 += pitch; 4080 } 4081 } 4082 else if(internal.depth == 16) 4083 { 4084 for(int y = 0; y < height; y++) 4085 { 4086 for(int x = 0; x < width; x += 4) 4087 { 4088 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4089 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4090 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4091 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4092 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4093 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4094 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4095 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4096 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x)); 4097 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x)); 4098 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x)); 4099 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x)); 4100 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x)); 4101 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x)); 4102 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x)); 4103 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x)); 4104 4105 c0 = _mm_avg_epu16(c0, c1); 4106 c2 = _mm_avg_epu16(c2, c3); 4107 c4 = _mm_avg_epu16(c4, c5); 4108 c6 = _mm_avg_epu16(c6, c7); 4109 c8 = _mm_avg_epu16(c8, c9); 4110 cA = _mm_avg_epu16(cA, cB); 4111 cC = _mm_avg_epu16(cC, cD); 4112 cE = _mm_avg_epu16(cE, cF); 4113 c0 = _mm_avg_epu16(c0, c2); 4114 c4 = _mm_avg_epu16(c4, c6); 4115 c8 = _mm_avg_epu16(c8, cA); 4116 cC = _mm_avg_epu16(cC, cE); 4117 c0 = _mm_avg_epu16(c0, c4); 4118 c8 = _mm_avg_epu16(c8, cC); 4119 c0 = _mm_avg_epu16(c0, c8); 4120 4121 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4122 } 4123 4124 source0 += pitch; 4125 source1 += pitch; 4126 source2 += pitch; 4127 source3 += pitch; 4128 source4 += pitch; 4129 source5 += pitch; 4130 source6 += pitch; 4131 source7 += pitch; 4132 source8 += pitch; 4133 source9 += pitch; 4134 sourceA += pitch; 4135 sourceB += pitch; 4136 sourceC += pitch; 4137 sourceD += pitch; 4138 sourceE += pitch; 4139 sourceF += pitch; 4140 } 4141 } 4142 else ASSERT(false); 4143 } 4144 else 4145 { 4146 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001)) 4147 4148 if(internal.depth == 2) 4149 { 4150 for(int y = 0; y < height; y++) 4151 { 4152 for(int x = 0; x < width; x++) 4153 { 4154 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4155 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4156 4157 c0 = AVERAGE(c0, c1); 4158 4159 *(unsigned int*)(source0 + 4 * x) = c0; 4160 } 4161 4162 source0 += pitch; 4163 source1 += pitch; 4164 } 4165 } 4166 else if(internal.depth == 4) 4167 { 4168 for(int y = 0; y < height; y++) 4169 { 4170 for(int x = 0; x < width; x++) 4171 { 4172 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4173 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4174 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4175 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4176 4177 c0 = AVERAGE(c0, c1); 4178 c2 = AVERAGE(c2, c3); 4179 c0 = AVERAGE(c0, c2); 4180 4181 *(unsigned int*)(source0 + 4 * x) = c0; 4182 } 4183 4184 source0 += pitch; 4185 source1 += pitch; 4186 source2 += pitch; 4187 source3 += pitch; 4188 } 4189 } 4190 else if(internal.depth == 8) 4191 { 4192 for(int y = 0; y < height; y++) 4193 { 4194 for(int x = 0; x < width; x++) 4195 { 4196 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4197 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4198 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4199 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4200 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4201 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4202 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4203 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4204 4205 c0 = AVERAGE(c0, c1); 4206 c2 = AVERAGE(c2, c3); 4207 c4 = AVERAGE(c4, c5); 4208 c6 = AVERAGE(c6, c7); 4209 c0 = AVERAGE(c0, c2); 4210 c4 = AVERAGE(c4, c6); 4211 c0 = AVERAGE(c0, c4); 4212 4213 *(unsigned int*)(source0 + 4 * x) = c0; 4214 } 4215 4216 source0 += pitch; 4217 source1 += pitch; 4218 source2 += pitch; 4219 source3 += pitch; 4220 source4 += pitch; 4221 source5 += pitch; 4222 source6 += pitch; 4223 source7 += pitch; 4224 } 4225 } 4226 else if(internal.depth == 16) 4227 { 4228 for(int y = 0; y < height; y++) 4229 { 4230 for(int x = 0; x < width; x++) 4231 { 4232 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4233 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4234 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4235 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4236 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4237 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4238 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4239 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4240 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 4241 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 4242 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 4243 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 4244 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 4245 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 4246 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 4247 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 4248 4249 c0 = AVERAGE(c0, c1); 4250 c2 = AVERAGE(c2, c3); 4251 c4 = AVERAGE(c4, c5); 4252 c6 = AVERAGE(c6, c7); 4253 c8 = AVERAGE(c8, c9); 4254 cA = AVERAGE(cA, cB); 4255 cC = AVERAGE(cC, cD); 4256 cE = AVERAGE(cE, cF); 4257 c0 = AVERAGE(c0, c2); 4258 c4 = AVERAGE(c4, c6); 4259 c8 = AVERAGE(c8, cA); 4260 cC = AVERAGE(cC, cE); 4261 c0 = AVERAGE(c0, c4); 4262 c8 = AVERAGE(c8, cC); 4263 c0 = AVERAGE(c0, c8); 4264 4265 *(unsigned int*)(source0 + 4 * x) = c0; 4266 } 4267 4268 source0 += pitch; 4269 source1 += pitch; 4270 source2 += pitch; 4271 source3 += pitch; 4272 source4 += pitch; 4273 source5 += pitch; 4274 source6 += pitch; 4275 source7 += pitch; 4276 source8 += pitch; 4277 source9 += pitch; 4278 sourceA += pitch; 4279 sourceB += pitch; 4280 sourceC += pitch; 4281 sourceD += pitch; 4282 sourceE += pitch; 4283 sourceF += pitch; 4284 } 4285 } 4286 else ASSERT(false); 4287 4288 #undef AVERAGE 4289 } 4290 } 4291 else if(internal.format == FORMAT_A16B16G16R16) 4292 { 4293 if(CPUID::supportsSSE2() && (width % 2) == 0) 4294 { 4295 if(internal.depth == 2) 4296 { 4297 for(int y = 0; y < height; y++) 4298 { 4299 for(int x = 0; x < width; x += 2) 4300 { 4301 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4302 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4303 4304 c0 = _mm_avg_epu16(c0, c1); 4305 4306 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4307 } 4308 4309 source0 += pitch; 4310 source1 += pitch; 4311 } 4312 } 4313 else if(internal.depth == 4) 4314 { 4315 for(int y = 0; y < height; y++) 4316 { 4317 for(int x = 0; x < width; x += 2) 4318 { 4319 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4320 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4321 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4322 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4323 4324 c0 = _mm_avg_epu16(c0, c1); 4325 c2 = _mm_avg_epu16(c2, c3); 4326 c0 = _mm_avg_epu16(c0, c2); 4327 4328 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4329 } 4330 4331 source0 += pitch; 4332 source1 += pitch; 4333 source2 += pitch; 4334 source3 += pitch; 4335 } 4336 } 4337 else if(internal.depth == 8) 4338 { 4339 for(int y = 0; y < height; y++) 4340 { 4341 for(int x = 0; x < width; x += 2) 4342 { 4343 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4344 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4345 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4346 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4347 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x)); 4348 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x)); 4349 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x)); 4350 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x)); 4351 4352 c0 = _mm_avg_epu16(c0, c1); 4353 c2 = _mm_avg_epu16(c2, c3); 4354 c4 = _mm_avg_epu16(c4, c5); 4355 c6 = _mm_avg_epu16(c6, c7); 4356 c0 = _mm_avg_epu16(c0, c2); 4357 c4 = _mm_avg_epu16(c4, c6); 4358 c0 = _mm_avg_epu16(c0, c4); 4359 4360 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4361 } 4362 4363 source0 += pitch; 4364 source1 += pitch; 4365 source2 += pitch; 4366 source3 += pitch; 4367 source4 += pitch; 4368 source5 += pitch; 4369 source6 += pitch; 4370 source7 += pitch; 4371 } 4372 } 4373 else if(internal.depth == 16) 4374 { 4375 for(int y = 0; y < height; y++) 4376 { 4377 for(int x = 0; x < width; x += 2) 4378 { 4379 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4380 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4381 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4382 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4383 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x)); 4384 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x)); 4385 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x)); 4386 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x)); 4387 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 8 * x)); 4388 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 8 * x)); 4389 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 8 * x)); 4390 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 8 * x)); 4391 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 8 * x)); 4392 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 8 * x)); 4393 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 8 * x)); 4394 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 8 * x)); 4395 4396 c0 = _mm_avg_epu16(c0, c1); 4397 c2 = _mm_avg_epu16(c2, c3); 4398 c4 = _mm_avg_epu16(c4, c5); 4399 c6 = _mm_avg_epu16(c6, c7); 4400 c8 = _mm_avg_epu16(c8, c9); 4401 cA = _mm_avg_epu16(cA, cB); 4402 cC = _mm_avg_epu16(cC, cD); 4403 cE = _mm_avg_epu16(cE, cF); 4404 c0 = _mm_avg_epu16(c0, c2); 4405 c4 = _mm_avg_epu16(c4, c6); 4406 c8 = _mm_avg_epu16(c8, cA); 4407 cC = _mm_avg_epu16(cC, cE); 4408 c0 = _mm_avg_epu16(c0, c4); 4409 c8 = _mm_avg_epu16(c8, cC); 4410 c0 = _mm_avg_epu16(c0, c8); 4411 4412 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4413 } 4414 4415 source0 += pitch; 4416 source1 += pitch; 4417 source2 += pitch; 4418 source3 += pitch; 4419 source4 += pitch; 4420 source5 += pitch; 4421 source6 += pitch; 4422 source7 += pitch; 4423 source8 += pitch; 4424 source9 += pitch; 4425 sourceA += pitch; 4426 sourceB += pitch; 4427 sourceC += pitch; 4428 sourceD += pitch; 4429 sourceE += pitch; 4430 sourceF += pitch; 4431 } 4432 } 4433 else ASSERT(false); 4434 } 4435 else 4436 { 4437 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001)) 4438 4439 if(internal.depth == 2) 4440 { 4441 for(int y = 0; y < height; y++) 4442 { 4443 for(int x = 0; x < 2 * width; x++) 4444 { 4445 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4446 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4447 4448 c0 = AVERAGE(c0, c1); 4449 4450 *(unsigned int*)(source0 + 4 * x) = c0; 4451 } 4452 4453 source0 += pitch; 4454 source1 += pitch; 4455 } 4456 } 4457 else if(internal.depth == 4) 4458 { 4459 for(int y = 0; y < height; y++) 4460 { 4461 for(int x = 0; x < 2 * width; x++) 4462 { 4463 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4464 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4465 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4466 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4467 4468 c0 = AVERAGE(c0, c1); 4469 c2 = AVERAGE(c2, c3); 4470 c0 = AVERAGE(c0, c2); 4471 4472 *(unsigned int*)(source0 + 4 * x) = c0; 4473 } 4474 4475 source0 += pitch; 4476 source1 += pitch; 4477 source2 += pitch; 4478 source3 += pitch; 4479 } 4480 } 4481 else if(internal.depth == 8) 4482 { 4483 for(int y = 0; y < height; y++) 4484 { 4485 for(int x = 0; x < 2 * width; x++) 4486 { 4487 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4488 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4489 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4490 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4491 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4492 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4493 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4494 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4495 4496 c0 = AVERAGE(c0, c1); 4497 c2 = AVERAGE(c2, c3); 4498 c4 = AVERAGE(c4, c5); 4499 c6 = AVERAGE(c6, c7); 4500 c0 = AVERAGE(c0, c2); 4501 c4 = AVERAGE(c4, c6); 4502 c0 = AVERAGE(c0, c4); 4503 4504 *(unsigned int*)(source0 + 4 * x) = c0; 4505 } 4506 4507 source0 += pitch; 4508 source1 += pitch; 4509 source2 += pitch; 4510 source3 += pitch; 4511 source4 += pitch; 4512 source5 += pitch; 4513 source6 += pitch; 4514 source7 += pitch; 4515 } 4516 } 4517 else if(internal.depth == 16) 4518 { 4519 for(int y = 0; y < height; y++) 4520 { 4521 for(int x = 0; x < 2 * width; x++) 4522 { 4523 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4524 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4525 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4526 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4527 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4528 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4529 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4530 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4531 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 4532 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 4533 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 4534 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 4535 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 4536 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 4537 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 4538 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 4539 4540 c0 = AVERAGE(c0, c1); 4541 c2 = AVERAGE(c2, c3); 4542 c4 = AVERAGE(c4, c5); 4543 c6 = AVERAGE(c6, c7); 4544 c8 = AVERAGE(c8, c9); 4545 cA = AVERAGE(cA, cB); 4546 cC = AVERAGE(cC, cD); 4547 cE = AVERAGE(cE, cF); 4548 c0 = AVERAGE(c0, c2); 4549 c4 = AVERAGE(c4, c6); 4550 c8 = AVERAGE(c8, cA); 4551 cC = AVERAGE(cC, cE); 4552 c0 = AVERAGE(c0, c4); 4553 c8 = AVERAGE(c8, cC); 4554 c0 = AVERAGE(c0, c8); 4555 4556 *(unsigned int*)(source0 + 4 * x) = c0; 4557 } 4558 4559 source0 += pitch; 4560 source1 += pitch; 4561 source2 += pitch; 4562 source3 += pitch; 4563 source4 += pitch; 4564 source5 += pitch; 4565 source6 += pitch; 4566 source7 += pitch; 4567 source8 += pitch; 4568 source9 += pitch; 4569 sourceA += pitch; 4570 sourceB += pitch; 4571 sourceC += pitch; 4572 sourceD += pitch; 4573 sourceE += pitch; 4574 sourceF += pitch; 4575 } 4576 } 4577 else ASSERT(false); 4578 4579 #undef AVERAGE 4580 } 4581 } 4582 else if(internal.format == FORMAT_R32F) 4583 { 4584 if(CPUID::supportsSSE() && (width % 4) == 0) 4585 { 4586 if(internal.depth == 2) 4587 { 4588 for(int y = 0; y < height; y++) 4589 { 4590 for(int x = 0; x < width; x += 4) 4591 { 4592 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4593 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4594 4595 c0 = _mm_add_ps(c0, c1); 4596 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 4597 4598 _mm_store_ps((float*)(source0 + 4 * x), c0); 4599 } 4600 4601 source0 += pitch; 4602 source1 += pitch; 4603 } 4604 } 4605 else if(internal.depth == 4) 4606 { 4607 for(int y = 0; y < height; y++) 4608 { 4609 for(int x = 0; x < width; x += 4) 4610 { 4611 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4612 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4613 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4614 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4615 4616 c0 = _mm_add_ps(c0, c1); 4617 c2 = _mm_add_ps(c2, c3); 4618 c0 = _mm_add_ps(c0, c2); 4619 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 4620 4621 _mm_store_ps((float*)(source0 + 4 * x), c0); 4622 } 4623 4624 source0 += pitch; 4625 source1 += pitch; 4626 source2 += pitch; 4627 source3 += pitch; 4628 } 4629 } 4630 else if(internal.depth == 8) 4631 { 4632 for(int y = 0; y < height; y++) 4633 { 4634 for(int x = 0; x < width; x += 4) 4635 { 4636 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4637 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4638 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4639 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4640 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x)); 4641 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x)); 4642 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x)); 4643 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x)); 4644 4645 c0 = _mm_add_ps(c0, c1); 4646 c2 = _mm_add_ps(c2, c3); 4647 c4 = _mm_add_ps(c4, c5); 4648 c6 = _mm_add_ps(c6, c7); 4649 c0 = _mm_add_ps(c0, c2); 4650 c4 = _mm_add_ps(c4, c6); 4651 c0 = _mm_add_ps(c0, c4); 4652 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 4653 4654 _mm_store_ps((float*)(source0 + 4 * x), c0); 4655 } 4656 4657 source0 += pitch; 4658 source1 += pitch; 4659 source2 += pitch; 4660 source3 += pitch; 4661 source4 += pitch; 4662 source5 += pitch; 4663 source6 += pitch; 4664 source7 += pitch; 4665 } 4666 } 4667 else if(internal.depth == 16) 4668 { 4669 for(int y = 0; y < height; y++) 4670 { 4671 for(int x = 0; x < width; x += 4) 4672 { 4673 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4674 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4675 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4676 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4677 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x)); 4678 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x)); 4679 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x)); 4680 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x)); 4681 __m128 c8 = _mm_load_ps((float*)(source8 + 4 * x)); 4682 __m128 c9 = _mm_load_ps((float*)(source9 + 4 * x)); 4683 __m128 cA = _mm_load_ps((float*)(sourceA + 4 * x)); 4684 __m128 cB = _mm_load_ps((float*)(sourceB + 4 * x)); 4685 __m128 cC = _mm_load_ps((float*)(sourceC + 4 * x)); 4686 __m128 cD = _mm_load_ps((float*)(sourceD + 4 * x)); 4687 __m128 cE = _mm_load_ps((float*)(sourceE + 4 * x)); 4688 __m128 cF = _mm_load_ps((float*)(sourceF + 4 * x)); 4689 4690 c0 = _mm_add_ps(c0, c1); 4691 c2 = _mm_add_ps(c2, c3); 4692 c4 = _mm_add_ps(c4, c5); 4693 c6 = _mm_add_ps(c6, c7); 4694 c8 = _mm_add_ps(c8, c9); 4695 cA = _mm_add_ps(cA, cB); 4696 cC = _mm_add_ps(cC, cD); 4697 cE = _mm_add_ps(cE, cF); 4698 c0 = _mm_add_ps(c0, c2); 4699 c4 = _mm_add_ps(c4, c6); 4700 c8 = _mm_add_ps(c8, cA); 4701 cC = _mm_add_ps(cC, cE); 4702 c0 = _mm_add_ps(c0, c4); 4703 c8 = _mm_add_ps(c8, cC); 4704 c0 = _mm_add_ps(c0, c8); 4705 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 4706 4707 _mm_store_ps((float*)(source0 + 4 * x), c0); 4708 } 4709 4710 source0 += pitch; 4711 source1 += pitch; 4712 source2 += pitch; 4713 source3 += pitch; 4714 source4 += pitch; 4715 source5 += pitch; 4716 source6 += pitch; 4717 source7 += pitch; 4718 source8 += pitch; 4719 source9 += pitch; 4720 sourceA += pitch; 4721 sourceB += pitch; 4722 sourceC += pitch; 4723 sourceD += pitch; 4724 sourceE += pitch; 4725 sourceF += pitch; 4726 } 4727 } 4728 else ASSERT(false); 4729 } 4730 else 4731 { 4732 if(internal.depth == 2) 4733 { 4734 for(int y = 0; y < height; y++) 4735 { 4736 for(int x = 0; x < width; x++) 4737 { 4738 float c0 = *(float*)(source0 + 4 * x); 4739 float c1 = *(float*)(source1 + 4 * x); 4740 4741 c0 = c0 + c1; 4742 c0 *= 1.0f / 2.0f; 4743 4744 *(float*)(source0 + 4 * x) = c0; 4745 } 4746 4747 source0 += pitch; 4748 source1 += pitch; 4749 } 4750 } 4751 else if(internal.depth == 4) 4752 { 4753 for(int y = 0; y < height; y++) 4754 { 4755 for(int x = 0; x < width; x++) 4756 { 4757 float c0 = *(float*)(source0 + 4 * x); 4758 float c1 = *(float*)(source1 + 4 * x); 4759 float c2 = *(float*)(source2 + 4 * x); 4760 float c3 = *(float*)(source3 + 4 * x); 4761 4762 c0 = c0 + c1; 4763 c2 = c2 + c3; 4764 c0 = c0 + c2; 4765 c0 *= 1.0f / 4.0f; 4766 4767 *(float*)(source0 + 4 * x) = c0; 4768 } 4769 4770 source0 += pitch; 4771 source1 += pitch; 4772 source2 += pitch; 4773 source3 += pitch; 4774 } 4775 } 4776 else if(internal.depth == 8) 4777 { 4778 for(int y = 0; y < height; y++) 4779 { 4780 for(int x = 0; x < width; x++) 4781 { 4782 float c0 = *(float*)(source0 + 4 * x); 4783 float c1 = *(float*)(source1 + 4 * x); 4784 float c2 = *(float*)(source2 + 4 * x); 4785 float c3 = *(float*)(source3 + 4 * x); 4786 float c4 = *(float*)(source4 + 4 * x); 4787 float c5 = *(float*)(source5 + 4 * x); 4788 float c6 = *(float*)(source6 + 4 * x); 4789 float c7 = *(float*)(source7 + 4 * x); 4790 4791 c0 = c0 + c1; 4792 c2 = c2 + c3; 4793 c4 = c4 + c5; 4794 c6 = c6 + c7; 4795 c0 = c0 + c2; 4796 c4 = c4 + c6; 4797 c0 = c0 + c4; 4798 c0 *= 1.0f / 8.0f; 4799 4800 *(float*)(source0 + 4 * x) = c0; 4801 } 4802 4803 source0 += pitch; 4804 source1 += pitch; 4805 source2 += pitch; 4806 source3 += pitch; 4807 source4 += pitch; 4808 source5 += pitch; 4809 source6 += pitch; 4810 source7 += pitch; 4811 } 4812 } 4813 else if(internal.depth == 16) 4814 { 4815 for(int y = 0; y < height; y++) 4816 { 4817 for(int x = 0; x < width; x++) 4818 { 4819 float c0 = *(float*)(source0 + 4 * x); 4820 float c1 = *(float*)(source1 + 4 * x); 4821 float c2 = *(float*)(source2 + 4 * x); 4822 float c3 = *(float*)(source3 + 4 * x); 4823 float c4 = *(float*)(source4 + 4 * x); 4824 float c5 = *(float*)(source5 + 4 * x); 4825 float c6 = *(float*)(source6 + 4 * x); 4826 float c7 = *(float*)(source7 + 4 * x); 4827 float c8 = *(float*)(source8 + 4 * x); 4828 float c9 = *(float*)(source9 + 4 * x); 4829 float cA = *(float*)(sourceA + 4 * x); 4830 float cB = *(float*)(sourceB + 4 * x); 4831 float cC = *(float*)(sourceC + 4 * x); 4832 float cD = *(float*)(sourceD + 4 * x); 4833 float cE = *(float*)(sourceE + 4 * x); 4834 float cF = *(float*)(sourceF + 4 * x); 4835 4836 c0 = c0 + c1; 4837 c2 = c2 + c3; 4838 c4 = c4 + c5; 4839 c6 = c6 + c7; 4840 c8 = c8 + c9; 4841 cA = cA + cB; 4842 cC = cC + cD; 4843 cE = cE + cF; 4844 c0 = c0 + c2; 4845 c4 = c4 + c6; 4846 c8 = c8 + cA; 4847 cC = cC + cE; 4848 c0 = c0 + c4; 4849 c8 = c8 + cC; 4850 c0 = c0 + c8; 4851 c0 *= 1.0f / 16.0f; 4852 4853 *(float*)(source0 + 4 * x) = c0; 4854 } 4855 4856 source0 += pitch; 4857 source1 += pitch; 4858 source2 += pitch; 4859 source3 += pitch; 4860 source4 += pitch; 4861 source5 += pitch; 4862 source6 += pitch; 4863 source7 += pitch; 4864 source8 += pitch; 4865 source9 += pitch; 4866 sourceA += pitch; 4867 sourceB += pitch; 4868 sourceC += pitch; 4869 sourceD += pitch; 4870 sourceE += pitch; 4871 sourceF += pitch; 4872 } 4873 } 4874 else ASSERT(false); 4875 } 4876 } 4877 else if(internal.format == FORMAT_G32R32F) 4878 { 4879 if(CPUID::supportsSSE() && (width % 2) == 0) 4880 { 4881 if(internal.depth == 2) 4882 { 4883 for(int y = 0; y < height; y++) 4884 { 4885 for(int x = 0; x < width; x += 2) 4886 { 4887 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 4888 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 4889 4890 c0 = _mm_add_ps(c0, c1); 4891 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 4892 4893 _mm_store_ps((float*)(source0 + 8 * x), c0); 4894 } 4895 4896 source0 += pitch; 4897 source1 += pitch; 4898 } 4899 } 4900 else if(internal.depth == 4) 4901 { 4902 for(int y = 0; y < height; y++) 4903 { 4904 for(int x = 0; x < width; x += 2) 4905 { 4906 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 4907 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 4908 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 4909 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 4910 4911 c0 = _mm_add_ps(c0, c1); 4912 c2 = _mm_add_ps(c2, c3); 4913 c0 = _mm_add_ps(c0, c2); 4914 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 4915 4916 _mm_store_ps((float*)(source0 + 8 * x), c0); 4917 } 4918 4919 source0 += pitch; 4920 source1 += pitch; 4921 source2 += pitch; 4922 source3 += pitch; 4923 } 4924 } 4925 else if(internal.depth == 8) 4926 { 4927 for(int y = 0; y < height; y++) 4928 { 4929 for(int x = 0; x < width; x += 2) 4930 { 4931 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 4932 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 4933 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 4934 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 4935 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x)); 4936 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x)); 4937 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x)); 4938 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x)); 4939 4940 c0 = _mm_add_ps(c0, c1); 4941 c2 = _mm_add_ps(c2, c3); 4942 c4 = _mm_add_ps(c4, c5); 4943 c6 = _mm_add_ps(c6, c7); 4944 c0 = _mm_add_ps(c0, c2); 4945 c4 = _mm_add_ps(c4, c6); 4946 c0 = _mm_add_ps(c0, c4); 4947 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 4948 4949 _mm_store_ps((float*)(source0 + 8 * x), c0); 4950 } 4951 4952 source0 += pitch; 4953 source1 += pitch; 4954 source2 += pitch; 4955 source3 += pitch; 4956 source4 += pitch; 4957 source5 += pitch; 4958 source6 += pitch; 4959 source7 += pitch; 4960 } 4961 } 4962 else if(internal.depth == 16) 4963 { 4964 for(int y = 0; y < height; y++) 4965 { 4966 for(int x = 0; x < width; x += 2) 4967 { 4968 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 4969 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 4970 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 4971 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 4972 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x)); 4973 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x)); 4974 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x)); 4975 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x)); 4976 __m128 c8 = _mm_load_ps((float*)(source8 + 8 * x)); 4977 __m128 c9 = _mm_load_ps((float*)(source9 + 8 * x)); 4978 __m128 cA = _mm_load_ps((float*)(sourceA + 8 * x)); 4979 __m128 cB = _mm_load_ps((float*)(sourceB + 8 * x)); 4980 __m128 cC = _mm_load_ps((float*)(sourceC + 8 * x)); 4981 __m128 cD = _mm_load_ps((float*)(sourceD + 8 * x)); 4982 __m128 cE = _mm_load_ps((float*)(sourceE + 8 * x)); 4983 __m128 cF = _mm_load_ps((float*)(sourceF + 8 * x)); 4984 4985 c0 = _mm_add_ps(c0, c1); 4986 c2 = _mm_add_ps(c2, c3); 4987 c4 = _mm_add_ps(c4, c5); 4988 c6 = _mm_add_ps(c6, c7); 4989 c8 = _mm_add_ps(c8, c9); 4990 cA = _mm_add_ps(cA, cB); 4991 cC = _mm_add_ps(cC, cD); 4992 cE = _mm_add_ps(cE, cF); 4993 c0 = _mm_add_ps(c0, c2); 4994 c4 = _mm_add_ps(c4, c6); 4995 c8 = _mm_add_ps(c8, cA); 4996 cC = _mm_add_ps(cC, cE); 4997 c0 = _mm_add_ps(c0, c4); 4998 c8 = _mm_add_ps(c8, cC); 4999 c0 = _mm_add_ps(c0, c8); 5000 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 5001 5002 _mm_store_ps((float*)(source0 + 8 * x), c0); 5003 } 5004 5005 source0 += pitch; 5006 source1 += pitch; 5007 source2 += pitch; 5008 source3 += pitch; 5009 source4 += pitch; 5010 source5 += pitch; 5011 source6 += pitch; 5012 source7 += pitch; 5013 source8 += pitch; 5014 source9 += pitch; 5015 sourceA += pitch; 5016 sourceB += pitch; 5017 sourceC += pitch; 5018 sourceD += pitch; 5019 sourceE += pitch; 5020 sourceF += pitch; 5021 } 5022 } 5023 else ASSERT(false); 5024 } 5025 else 5026 { 5027 if(internal.depth == 2) 5028 { 5029 for(int y = 0; y < height; y++) 5030 { 5031 for(int x = 0; x < 2 * width; x++) 5032 { 5033 float c0 = *(float*)(source0 + 4 * x); 5034 float c1 = *(float*)(source1 + 4 * x); 5035 5036 c0 = c0 + c1; 5037 c0 *= 1.0f / 2.0f; 5038 5039 *(float*)(source0 + 4 * x) = c0; 5040 } 5041 5042 source0 += pitch; 5043 source1 += pitch; 5044 } 5045 } 5046 else if(internal.depth == 4) 5047 { 5048 for(int y = 0; y < height; y++) 5049 { 5050 for(int x = 0; x < 2 * width; x++) 5051 { 5052 float c0 = *(float*)(source0 + 4 * x); 5053 float c1 = *(float*)(source1 + 4 * x); 5054 float c2 = *(float*)(source2 + 4 * x); 5055 float c3 = *(float*)(source3 + 4 * x); 5056 5057 c0 = c0 + c1; 5058 c2 = c2 + c3; 5059 c0 = c0 + c2; 5060 c0 *= 1.0f / 4.0f; 5061 5062 *(float*)(source0 + 4 * x) = c0; 5063 } 5064 5065 source0 += pitch; 5066 source1 += pitch; 5067 source2 += pitch; 5068 source3 += pitch; 5069 } 5070 } 5071 else if(internal.depth == 8) 5072 { 5073 for(int y = 0; y < height; y++) 5074 { 5075 for(int x = 0; x < 2 * width; x++) 5076 { 5077 float c0 = *(float*)(source0 + 4 * x); 5078 float c1 = *(float*)(source1 + 4 * x); 5079 float c2 = *(float*)(source2 + 4 * x); 5080 float c3 = *(float*)(source3 + 4 * x); 5081 float c4 = *(float*)(source4 + 4 * x); 5082 float c5 = *(float*)(source5 + 4 * x); 5083 float c6 = *(float*)(source6 + 4 * x); 5084 float c7 = *(float*)(source7 + 4 * x); 5085 5086 c0 = c0 + c1; 5087 c2 = c2 + c3; 5088 c4 = c4 + c5; 5089 c6 = c6 + c7; 5090 c0 = c0 + c2; 5091 c4 = c4 + c6; 5092 c0 = c0 + c4; 5093 c0 *= 1.0f / 8.0f; 5094 5095 *(float*)(source0 + 4 * x) = c0; 5096 } 5097 5098 source0 += pitch; 5099 source1 += pitch; 5100 source2 += pitch; 5101 source3 += pitch; 5102 source4 += pitch; 5103 source5 += pitch; 5104 source6 += pitch; 5105 source7 += pitch; 5106 } 5107 } 5108 else if(internal.depth == 16) 5109 { 5110 for(int y = 0; y < height; y++) 5111 { 5112 for(int x = 0; x < 2 * width; x++) 5113 { 5114 float c0 = *(float*)(source0 + 4 * x); 5115 float c1 = *(float*)(source1 + 4 * x); 5116 float c2 = *(float*)(source2 + 4 * x); 5117 float c3 = *(float*)(source3 + 4 * x); 5118 float c4 = *(float*)(source4 + 4 * x); 5119 float c5 = *(float*)(source5 + 4 * x); 5120 float c6 = *(float*)(source6 + 4 * x); 5121 float c7 = *(float*)(source7 + 4 * x); 5122 float c8 = *(float*)(source8 + 4 * x); 5123 float c9 = *(float*)(source9 + 4 * x); 5124 float cA = *(float*)(sourceA + 4 * x); 5125 float cB = *(float*)(sourceB + 4 * x); 5126 float cC = *(float*)(sourceC + 4 * x); 5127 float cD = *(float*)(sourceD + 4 * x); 5128 float cE = *(float*)(sourceE + 4 * x); 5129 float cF = *(float*)(sourceF + 4 * x); 5130 5131 c0 = c0 + c1; 5132 c2 = c2 + c3; 5133 c4 = c4 + c5; 5134 c6 = c6 + c7; 5135 c8 = c8 + c9; 5136 cA = cA + cB; 5137 cC = cC + cD; 5138 cE = cE + cF; 5139 c0 = c0 + c2; 5140 c4 = c4 + c6; 5141 c8 = c8 + cA; 5142 cC = cC + cE; 5143 c0 = c0 + c4; 5144 c8 = c8 + cC; 5145 c0 = c0 + c8; 5146 c0 *= 1.0f / 16.0f; 5147 5148 *(float*)(source0 + 4 * x) = c0; 5149 } 5150 5151 source0 += pitch; 5152 source1 += pitch; 5153 source2 += pitch; 5154 source3 += pitch; 5155 source4 += pitch; 5156 source5 += pitch; 5157 source6 += pitch; 5158 source7 += pitch; 5159 source8 += pitch; 5160 source9 += pitch; 5161 sourceA += pitch; 5162 sourceB += pitch; 5163 sourceC += pitch; 5164 sourceD += pitch; 5165 sourceE += pitch; 5166 sourceF += pitch; 5167 } 5168 } 5169 else ASSERT(false); 5170 } 5171 } 5172 else if(internal.format == FORMAT_A32B32G32R32F) 5173 { 5174 if(CPUID::supportsSSE()) 5175 { 5176 if(internal.depth == 2) 5177 { 5178 for(int y = 0; y < height; y++) 5179 { 5180 for(int x = 0; x < width; x++) 5181 { 5182 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5183 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5184 5185 c0 = _mm_add_ps(c0, c1); 5186 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 5187 5188 _mm_store_ps((float*)(source0 + 16 * x), c0); 5189 } 5190 5191 source0 += pitch; 5192 source1 += pitch; 5193 } 5194 } 5195 else if(internal.depth == 4) 5196 { 5197 for(int y = 0; y < height; y++) 5198 { 5199 for(int x = 0; x < width; x++) 5200 { 5201 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5202 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5203 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5204 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5205 5206 c0 = _mm_add_ps(c0, c1); 5207 c2 = _mm_add_ps(c2, c3); 5208 c0 = _mm_add_ps(c0, c2); 5209 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 5210 5211 _mm_store_ps((float*)(source0 + 16 * x), c0); 5212 } 5213 5214 source0 += pitch; 5215 source1 += pitch; 5216 source2 += pitch; 5217 source3 += pitch; 5218 } 5219 } 5220 else if(internal.depth == 8) 5221 { 5222 for(int y = 0; y < height; y++) 5223 { 5224 for(int x = 0; x < width; x++) 5225 { 5226 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5227 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5228 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5229 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5230 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x)); 5231 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x)); 5232 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x)); 5233 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x)); 5234 5235 c0 = _mm_add_ps(c0, c1); 5236 c2 = _mm_add_ps(c2, c3); 5237 c4 = _mm_add_ps(c4, c5); 5238 c6 = _mm_add_ps(c6, c7); 5239 c0 = _mm_add_ps(c0, c2); 5240 c4 = _mm_add_ps(c4, c6); 5241 c0 = _mm_add_ps(c0, c4); 5242 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 5243 5244 _mm_store_ps((float*)(source0 + 16 * x), c0); 5245 } 5246 5247 source0 += pitch; 5248 source1 += pitch; 5249 source2 += pitch; 5250 source3 += pitch; 5251 source4 += pitch; 5252 source5 += pitch; 5253 source6 += pitch; 5254 source7 += pitch; 5255 } 5256 } 5257 else if(internal.depth == 16) 5258 { 5259 for(int y = 0; y < height; y++) 5260 { 5261 for(int x = 0; x < width; x++) 5262 { 5263 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5264 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5265 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5266 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5267 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x)); 5268 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x)); 5269 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x)); 5270 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x)); 5271 __m128 c8 = _mm_load_ps((float*)(source8 + 16 * x)); 5272 __m128 c9 = _mm_load_ps((float*)(source9 + 16 * x)); 5273 __m128 cA = _mm_load_ps((float*)(sourceA + 16 * x)); 5274 __m128 cB = _mm_load_ps((float*)(sourceB + 16 * x)); 5275 __m128 cC = _mm_load_ps((float*)(sourceC + 16 * x)); 5276 __m128 cD = _mm_load_ps((float*)(sourceD + 16 * x)); 5277 __m128 cE = _mm_load_ps((float*)(sourceE + 16 * x)); 5278 __m128 cF = _mm_load_ps((float*)(sourceF + 16 * x)); 5279 5280 c0 = _mm_add_ps(c0, c1); 5281 c2 = _mm_add_ps(c2, c3); 5282 c4 = _mm_add_ps(c4, c5); 5283 c6 = _mm_add_ps(c6, c7); 5284 c8 = _mm_add_ps(c8, c9); 5285 cA = _mm_add_ps(cA, cB); 5286 cC = _mm_add_ps(cC, cD); 5287 cE = _mm_add_ps(cE, cF); 5288 c0 = _mm_add_ps(c0, c2); 5289 c4 = _mm_add_ps(c4, c6); 5290 c8 = _mm_add_ps(c8, cA); 5291 cC = _mm_add_ps(cC, cE); 5292 c0 = _mm_add_ps(c0, c4); 5293 c8 = _mm_add_ps(c8, cC); 5294 c0 = _mm_add_ps(c0, c8); 5295 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 5296 5297 _mm_store_ps((float*)(source0 + 16 * x), c0); 5298 } 5299 5300 source0 += pitch; 5301 source1 += pitch; 5302 source2 += pitch; 5303 source3 += pitch; 5304 source4 += pitch; 5305 source5 += pitch; 5306 source6 += pitch; 5307 source7 += pitch; 5308 source8 += pitch; 5309 source9 += pitch; 5310 sourceA += pitch; 5311 sourceB += pitch; 5312 sourceC += pitch; 5313 sourceD += pitch; 5314 sourceE += pitch; 5315 sourceF += pitch; 5316 } 5317 } 5318 else ASSERT(false); 5319 } 5320 else 5321 { 5322 if(internal.depth == 2) 5323 { 5324 for(int y = 0; y < height; y++) 5325 { 5326 for(int x = 0; x < 4 * width; x++) 5327 { 5328 float c0 = *(float*)(source0 + 4 * x); 5329 float c1 = *(float*)(source1 + 4 * x); 5330 5331 c0 = c0 + c1; 5332 c0 *= 1.0f / 2.0f; 5333 5334 *(float*)(source0 + 4 * x) = c0; 5335 } 5336 5337 source0 += pitch; 5338 source1 += pitch; 5339 } 5340 } 5341 else if(internal.depth == 4) 5342 { 5343 for(int y = 0; y < height; y++) 5344 { 5345 for(int x = 0; x < 4 * width; x++) 5346 { 5347 float c0 = *(float*)(source0 + 4 * x); 5348 float c1 = *(float*)(source1 + 4 * x); 5349 float c2 = *(float*)(source2 + 4 * x); 5350 float c3 = *(float*)(source3 + 4 * x); 5351 5352 c0 = c0 + c1; 5353 c2 = c2 + c3; 5354 c0 = c0 + c2; 5355 c0 *= 1.0f / 4.0f; 5356 5357 *(float*)(source0 + 4 * x) = c0; 5358 } 5359 5360 source0 += pitch; 5361 source1 += pitch; 5362 source2 += pitch; 5363 source3 += pitch; 5364 } 5365 } 5366 else if(internal.depth == 8) 5367 { 5368 for(int y = 0; y < height; y++) 5369 { 5370 for(int x = 0; x < 4 * width; x++) 5371 { 5372 float c0 = *(float*)(source0 + 4 * x); 5373 float c1 = *(float*)(source1 + 4 * x); 5374 float c2 = *(float*)(source2 + 4 * x); 5375 float c3 = *(float*)(source3 + 4 * x); 5376 float c4 = *(float*)(source4 + 4 * x); 5377 float c5 = *(float*)(source5 + 4 * x); 5378 float c6 = *(float*)(source6 + 4 * x); 5379 float c7 = *(float*)(source7 + 4 * x); 5380 5381 c0 = c0 + c1; 5382 c2 = c2 + c3; 5383 c4 = c4 + c5; 5384 c6 = c6 + c7; 5385 c0 = c0 + c2; 5386 c4 = c4 + c6; 5387 c0 = c0 + c4; 5388 c0 *= 1.0f / 8.0f; 5389 5390 *(float*)(source0 + 4 * x) = c0; 5391 } 5392 5393 source0 += pitch; 5394 source1 += pitch; 5395 source2 += pitch; 5396 source3 += pitch; 5397 source4 += pitch; 5398 source5 += pitch; 5399 source6 += pitch; 5400 source7 += pitch; 5401 } 5402 } 5403 else if(internal.depth == 16) 5404 { 5405 for(int y = 0; y < height; y++) 5406 { 5407 for(int x = 0; x < 4 * width; x++) 5408 { 5409 float c0 = *(float*)(source0 + 4 * x); 5410 float c1 = *(float*)(source1 + 4 * x); 5411 float c2 = *(float*)(source2 + 4 * x); 5412 float c3 = *(float*)(source3 + 4 * x); 5413 float c4 = *(float*)(source4 + 4 * x); 5414 float c5 = *(float*)(source5 + 4 * x); 5415 float c6 = *(float*)(source6 + 4 * x); 5416 float c7 = *(float*)(source7 + 4 * x); 5417 float c8 = *(float*)(source8 + 4 * x); 5418 float c9 = *(float*)(source9 + 4 * x); 5419 float cA = *(float*)(sourceA + 4 * x); 5420 float cB = *(float*)(sourceB + 4 * x); 5421 float cC = *(float*)(sourceC + 4 * x); 5422 float cD = *(float*)(sourceD + 4 * x); 5423 float cE = *(float*)(sourceE + 4 * x); 5424 float cF = *(float*)(sourceF + 4 * x); 5425 5426 c0 = c0 + c1; 5427 c2 = c2 + c3; 5428 c4 = c4 + c5; 5429 c6 = c6 + c7; 5430 c8 = c8 + c9; 5431 cA = cA + cB; 5432 cC = cC + cD; 5433 cE = cE + cF; 5434 c0 = c0 + c2; 5435 c4 = c4 + c6; 5436 c8 = c8 + cA; 5437 cC = cC + cE; 5438 c0 = c0 + c4; 5439 c8 = c8 + cC; 5440 c0 = c0 + c8; 5441 c0 *= 1.0f / 16.0f; 5442 5443 *(float*)(source0 + 4 * x) = c0; 5444 } 5445 5446 source0 += pitch; 5447 source1 += pitch; 5448 source2 += pitch; 5449 source3 += pitch; 5450 source4 += pitch; 5451 source5 += pitch; 5452 source6 += pitch; 5453 source7 += pitch; 5454 source8 += pitch; 5455 source9 += pitch; 5456 sourceA += pitch; 5457 sourceB += pitch; 5458 sourceC += pitch; 5459 sourceD += pitch; 5460 sourceE += pitch; 5461 sourceF += pitch; 5462 } 5463 } 5464 else ASSERT(false); 5465 } 5466 } 5467 else if(internal.format == FORMAT_R5G6B5) 5468 { 5469 if(CPUID::supportsSSE2() && (width % 8) == 0) 5470 { 5471 if(internal.depth == 2) 5472 { 5473 for(int y = 0; y < height; y++) 5474 { 5475 for(int x = 0; x < width; x += 8) 5476 { 5477 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5478 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5479 5480 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5481 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5482 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5483 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5484 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5485 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5486 5487 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5488 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5489 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5490 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5491 c0 = _mm_or_si128(c0, c1); 5492 5493 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5494 } 5495 5496 source0 += pitch; 5497 source1 += pitch; 5498 } 5499 } 5500 else if(internal.depth == 4) 5501 { 5502 for(int y = 0; y < height; y++) 5503 { 5504 for(int x = 0; x < width; x += 8) 5505 { 5506 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5507 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5508 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5509 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5510 5511 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5512 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5513 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5514 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5515 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5516 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5517 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5518 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5519 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5520 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5521 5522 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5523 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5524 c0 = _mm_avg_epu8(c0, c2); 5525 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5526 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5527 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5528 c1 = _mm_avg_epu16(c1, c3); 5529 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5530 c0 = _mm_or_si128(c0, c1); 5531 5532 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5533 } 5534 5535 source0 += pitch; 5536 source1 += pitch; 5537 source2 += pitch; 5538 source3 += pitch; 5539 } 5540 } 5541 else if(internal.depth == 8) 5542 { 5543 for(int y = 0; y < height; y++) 5544 { 5545 for(int x = 0; x < width; x += 8) 5546 { 5547 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5548 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5549 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5550 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5551 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x)); 5552 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x)); 5553 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x)); 5554 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x)); 5555 5556 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5557 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5558 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5559 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5560 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5561 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5562 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5563 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5564 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5565 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5566 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b)); 5567 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_)); 5568 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b)); 5569 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_)); 5570 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b)); 5571 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_)); 5572 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b)); 5573 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_)); 5574 5575 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5576 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5577 c4 = _mm_avg_epu8(c4_r_b, c5_r_b); 5578 c6 = _mm_avg_epu8(c6_r_b, c7_r_b); 5579 c0 = _mm_avg_epu8(c0, c2); 5580 c4 = _mm_avg_epu8(c4, c6); 5581 c0 = _mm_avg_epu8(c0, c4); 5582 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5583 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5584 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5585 c5 = _mm_avg_epu16(c4__g_, c5__g_); 5586 c7 = _mm_avg_epu16(c6__g_, c7__g_); 5587 c1 = _mm_avg_epu16(c1, c3); 5588 c5 = _mm_avg_epu16(c5, c7); 5589 c1 = _mm_avg_epu16(c1, c5); 5590 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5591 c0 = _mm_or_si128(c0, c1); 5592 5593 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5594 } 5595 5596 source0 += pitch; 5597 source1 += pitch; 5598 source2 += pitch; 5599 source3 += pitch; 5600 source4 += pitch; 5601 source5 += pitch; 5602 source6 += pitch; 5603 source7 += pitch; 5604 } 5605 } 5606 else if(internal.depth == 16) 5607 { 5608 for(int y = 0; y < height; y++) 5609 { 5610 for(int x = 0; x < width; x += 8) 5611 { 5612 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5613 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5614 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5615 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5616 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x)); 5617 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x)); 5618 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x)); 5619 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x)); 5620 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 2 * x)); 5621 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 2 * x)); 5622 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 2 * x)); 5623 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 2 * x)); 5624 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 2 * x)); 5625 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 2 * x)); 5626 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 2 * x)); 5627 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 2 * x)); 5628 5629 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5630 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5631 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5632 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5633 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5634 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5635 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5636 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5637 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5638 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5639 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b)); 5640 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_)); 5641 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b)); 5642 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_)); 5643 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b)); 5644 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_)); 5645 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b)); 5646 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_)); 5647 __m128i c8_r_b = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(r_b)); 5648 __m128i c8__g_ = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(_g_)); 5649 __m128i c9_r_b = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(r_b)); 5650 __m128i c9__g_ = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(_g_)); 5651 __m128i cA_r_b = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(r_b)); 5652 __m128i cA__g_ = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(_g_)); 5653 __m128i cB_r_b = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(r_b)); 5654 __m128i cB__g_ = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(_g_)); 5655 __m128i cC_r_b = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(r_b)); 5656 __m128i cC__g_ = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(_g_)); 5657 __m128i cD_r_b = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(r_b)); 5658 __m128i cD__g_ = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(_g_)); 5659 __m128i cE_r_b = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(r_b)); 5660 __m128i cE__g_ = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(_g_)); 5661 __m128i cF_r_b = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(r_b)); 5662 __m128i cF__g_ = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(_g_)); 5663 5664 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5665 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5666 c4 = _mm_avg_epu8(c4_r_b, c5_r_b); 5667 c6 = _mm_avg_epu8(c6_r_b, c7_r_b); 5668 c8 = _mm_avg_epu8(c8_r_b, c9_r_b); 5669 cA = _mm_avg_epu8(cA_r_b, cB_r_b); 5670 cC = _mm_avg_epu8(cC_r_b, cD_r_b); 5671 cE = _mm_avg_epu8(cE_r_b, cF_r_b); 5672 c0 = _mm_avg_epu8(c0, c2); 5673 c4 = _mm_avg_epu8(c4, c6); 5674 c8 = _mm_avg_epu8(c8, cA); 5675 cC = _mm_avg_epu8(cC, cE); 5676 c0 = _mm_avg_epu8(c0, c4); 5677 c8 = _mm_avg_epu8(c8, cC); 5678 c0 = _mm_avg_epu8(c0, c8); 5679 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5680 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5681 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5682 c5 = _mm_avg_epu16(c4__g_, c5__g_); 5683 c7 = _mm_avg_epu16(c6__g_, c7__g_); 5684 c9 = _mm_avg_epu16(c8__g_, c9__g_); 5685 cB = _mm_avg_epu16(cA__g_, cB__g_); 5686 cD = _mm_avg_epu16(cC__g_, cD__g_); 5687 cF = _mm_avg_epu16(cE__g_, cF__g_); 5688 c1 = _mm_avg_epu8(c1, c3); 5689 c5 = _mm_avg_epu8(c5, c7); 5690 c9 = _mm_avg_epu8(c9, cB); 5691 cD = _mm_avg_epu8(cD, cF); 5692 c1 = _mm_avg_epu8(c1, c5); 5693 c9 = _mm_avg_epu8(c9, cD); 5694 c1 = _mm_avg_epu8(c1, c9); 5695 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5696 c0 = _mm_or_si128(c0, c1); 5697 5698 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5699 } 5700 5701 source0 += pitch; 5702 source1 += pitch; 5703 source2 += pitch; 5704 source3 += pitch; 5705 source4 += pitch; 5706 source5 += pitch; 5707 source6 += pitch; 5708 source7 += pitch; 5709 source8 += pitch; 5710 source9 += pitch; 5711 sourceA += pitch; 5712 sourceB += pitch; 5713 sourceC += pitch; 5714 sourceD += pitch; 5715 sourceE += pitch; 5716 sourceF += pitch; 5717 } 5718 } 5719 else ASSERT(false); 5720 } 5721 else 5722 { 5723 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7BEF) + (((x) ^ (y)) & 0x0821)) 5724 5725 if(internal.depth == 2) 5726 { 5727 for(int y = 0; y < height; y++) 5728 { 5729 for(int x = 0; x < width; x++) 5730 { 5731 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5732 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5733 5734 c0 = AVERAGE(c0, c1); 5735 5736 *(unsigned short*)(source0 + 2 * x) = c0; 5737 } 5738 5739 source0 += pitch; 5740 source1 += pitch; 5741 } 5742 } 5743 else if(internal.depth == 4) 5744 { 5745 for(int y = 0; y < height; y++) 5746 { 5747 for(int x = 0; x < width; x++) 5748 { 5749 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5750 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5751 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 5752 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 5753 5754 c0 = AVERAGE(c0, c1); 5755 c2 = AVERAGE(c2, c3); 5756 c0 = AVERAGE(c0, c2); 5757 5758 *(unsigned short*)(source0 + 2 * x) = c0; 5759 } 5760 5761 source0 += pitch; 5762 source1 += pitch; 5763 source2 += pitch; 5764 source3 += pitch; 5765 } 5766 } 5767 else if(internal.depth == 8) 5768 { 5769 for(int y = 0; y < height; y++) 5770 { 5771 for(int x = 0; x < width; x++) 5772 { 5773 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5774 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5775 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 5776 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 5777 unsigned short c4 = *(unsigned short*)(source4 + 2 * x); 5778 unsigned short c5 = *(unsigned short*)(source5 + 2 * x); 5779 unsigned short c6 = *(unsigned short*)(source6 + 2 * x); 5780 unsigned short c7 = *(unsigned short*)(source7 + 2 * x); 5781 5782 c0 = AVERAGE(c0, c1); 5783 c2 = AVERAGE(c2, c3); 5784 c4 = AVERAGE(c4, c5); 5785 c6 = AVERAGE(c6, c7); 5786 c0 = AVERAGE(c0, c2); 5787 c4 = AVERAGE(c4, c6); 5788 c0 = AVERAGE(c0, c4); 5789 5790 *(unsigned short*)(source0 + 2 * x) = c0; 5791 } 5792 5793 source0 += pitch; 5794 source1 += pitch; 5795 source2 += pitch; 5796 source3 += pitch; 5797 source4 += pitch; 5798 source5 += pitch; 5799 source6 += pitch; 5800 source7 += pitch; 5801 } 5802 } 5803 else if(internal.depth == 16) 5804 { 5805 for(int y = 0; y < height; y++) 5806 { 5807 for(int x = 0; x < width; x++) 5808 { 5809 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5810 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5811 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 5812 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 5813 unsigned short c4 = *(unsigned short*)(source4 + 2 * x); 5814 unsigned short c5 = *(unsigned short*)(source5 + 2 * x); 5815 unsigned short c6 = *(unsigned short*)(source6 + 2 * x); 5816 unsigned short c7 = *(unsigned short*)(source7 + 2 * x); 5817 unsigned short c8 = *(unsigned short*)(source8 + 2 * x); 5818 unsigned short c9 = *(unsigned short*)(source9 + 2 * x); 5819 unsigned short cA = *(unsigned short*)(sourceA + 2 * x); 5820 unsigned short cB = *(unsigned short*)(sourceB + 2 * x); 5821 unsigned short cC = *(unsigned short*)(sourceC + 2 * x); 5822 unsigned short cD = *(unsigned short*)(sourceD + 2 * x); 5823 unsigned short cE = *(unsigned short*)(sourceE + 2 * x); 5824 unsigned short cF = *(unsigned short*)(sourceF + 2 * x); 5825 5826 c0 = AVERAGE(c0, c1); 5827 c2 = AVERAGE(c2, c3); 5828 c4 = AVERAGE(c4, c5); 5829 c6 = AVERAGE(c6, c7); 5830 c8 = AVERAGE(c8, c9); 5831 cA = AVERAGE(cA, cB); 5832 cC = AVERAGE(cC, cD); 5833 cE = AVERAGE(cE, cF); 5834 c0 = AVERAGE(c0, c2); 5835 c4 = AVERAGE(c4, c6); 5836 c8 = AVERAGE(c8, cA); 5837 cC = AVERAGE(cC, cE); 5838 c0 = AVERAGE(c0, c4); 5839 c8 = AVERAGE(c8, cC); 5840 c0 = AVERAGE(c0, c8); 5841 5842 *(unsigned short*)(source0 + 2 * x) = c0; 5843 } 5844 5845 source0 += pitch; 5846 source1 += pitch; 5847 source2 += pitch; 5848 source3 += pitch; 5849 source4 += pitch; 5850 source5 += pitch; 5851 source6 += pitch; 5852 source7 += pitch; 5853 source8 += pitch; 5854 source9 += pitch; 5855 sourceA += pitch; 5856 sourceB += pitch; 5857 sourceC += pitch; 5858 sourceD += pitch; 5859 sourceE += pitch; 5860 sourceF += pitch; 5861 } 5862 } 5863 else ASSERT(false); 5864 5865 #undef AVERAGE 5866 } 5867 } 5868 else 5869 { 5870 // UNIMPLEMENTED(); 5871 } 5872 } 5873} 5874