Surface.cpp revision dbd1a8e6cb781672840a5f7d53d162247a4dbe98
1// SwiftShader Software Renderer 2// 3// Copyright(c) 2005-2013 TransGaming Inc. 4// 5// All rights reserved. No part of this software may be copied, distributed, transmitted, 6// transcribed, stored in a retrieval system, translated into any human or computer 7// language by any means, or disclosed to third parties without the explicit written 8// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express 9// or implied, including but not limited to any patent rights, are granted to you. 10// 11 12#include "Surface.hpp" 13 14#include "Color.hpp" 15#include "Context.hpp" 16#include "ETC_Decoder.hpp" 17#include "Renderer.hpp" 18#include "Common/Half.hpp" 19#include "Common/Memory.hpp" 20#include "Common/CPUID.hpp" 21#include "Common/Resource.hpp" 22#include "Common/Debug.hpp" 23#include "Reactor/Reactor.hpp" 24 25#include <xmmintrin.h> 26#include <emmintrin.h> 27 28#undef min 29#undef max 30 31namespace sw 32{ 33 extern bool quadLayoutEnabled; 34 extern bool complementaryDepthBuffer; 35 extern TranscendentalPrecision logPrecision; 36 37 unsigned int *Surface::palette = 0; 38 unsigned int Surface::paletteID = 0; 39 40 void Rect::clip(int minX, int minY, int maxX, int maxY) 41 { 42 x0 = clamp(x0, minX, maxX); 43 y0 = clamp(y0, minY, maxY); 44 x1 = clamp(x1, minX, maxX); 45 y1 = clamp(y1, minY, maxY); 46 } 47 48 void Surface::Buffer::write(int x, int y, int z, const Color<float> &color) 49 { 50 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB; 51 52 write(element, color); 53 } 54 55 void Surface::Buffer::write(int x, int y, const Color<float> &color) 56 { 57 void *element = (unsigned char*)buffer + x * bytes + y * pitchB; 58 59 write(element, color); 60 } 61 62 inline void Surface::Buffer::write(void *element, const Color<float> &color) 63 { 64 switch(format) 65 { 66 case FORMAT_A8: 67 *(unsigned char*)element = unorm<8>(color.a); 68 break; 69 case FORMAT_R8I_SNORM: 70 *(char*)element = snorm<8>(color.r); 71 break; 72 case FORMAT_R8: 73 *(unsigned char*)element = unorm<8>(color.r); 74 break; 75 case FORMAT_R8I: 76 *(char*)element = scast<8>(color.r); 77 break; 78 case FORMAT_R8UI: 79 *(unsigned char*)element = ucast<8>(color.r); 80 break; 81 case FORMAT_R16I: 82 *(short*)element = scast<16>(color.r); 83 break; 84 case FORMAT_R16UI: 85 *(unsigned short*)element = ucast<16>(color.r); 86 break; 87 case FORMAT_R32I: 88 *(int*)element = static_cast<int>(color.r); 89 break; 90 case FORMAT_R32UI: 91 *(unsigned int*)element = static_cast<unsigned int>(color.r); 92 break; 93 case FORMAT_R3G3B2: 94 *(unsigned char*)element = (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0); 95 break; 96 case FORMAT_A8R3G3B2: 97 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0); 98 break; 99 case FORMAT_X4R4G4B4: 100 *(unsigned short*)element = 0xF000 | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0); 101 break; 102 case FORMAT_A4R4G4B4: 103 *(unsigned short*)element = (unorm<4>(color.a) << 12) | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0); 104 break; 105 case FORMAT_R4G4B4A4: 106 *(unsigned short*)element = (unorm<4>(color.r) << 12) | (unorm<4>(color.g) << 8) | (unorm<4>(color.b) << 4) | (unorm<4>(color.a) << 0); 107 break; 108 case FORMAT_R5G6B5: 109 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<6>(color.g) << 5) | (unorm<5>(color.b) << 0); 110 break; 111 case FORMAT_A1R5G5B5: 112 *(unsigned short*)element = (unorm<1>(color.a) << 15) | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0); 113 break; 114 case FORMAT_R5G5B5A1: 115 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<5>(color.g) << 6) | (unorm<5>(color.b) << 1) | (unorm<5>(color.a) << 0); 116 break; 117 case FORMAT_X1R5G5B5: 118 *(unsigned short*)element = 0x8000 | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0); 119 break; 120 case FORMAT_A8R8G8B8: 121 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0); 122 break; 123 case FORMAT_X8R8G8B8: 124 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0); 125 break; 126 case FORMAT_A8B8G8R8I_SNORM: 127 *(unsigned int*)element = (static_cast<unsigned int>(snorm<8>(color.a)) << 24) | 128 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) | 129 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) | 130 (static_cast<unsigned int>(snorm<8>(color.r)) << 0); 131 break; 132 case FORMAT_A8B8G8R8: 133 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0); 134 break; 135 case FORMAT_A8B8G8R8I: 136 *(unsigned int*)element = (static_cast<unsigned int>(scast<8>(color.a)) << 24) | 137 (static_cast<unsigned int>(scast<8>(color.b)) << 16) | 138 (static_cast<unsigned int>(scast<8>(color.g)) << 8) | 139 (static_cast<unsigned int>(scast<8>(color.r)) << 0); 140 break; 141 case FORMAT_A8B8G8R8UI: 142 *(unsigned int*)element = (ucast<8>(color.a) << 24) | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0); 143 break; 144 case FORMAT_X8B8G8R8I_SNORM: 145 *(unsigned int*)element = 0x7F000000 | 146 (static_cast<unsigned int>(snorm<8>(color.b)) << 16) | 147 (static_cast<unsigned int>(snorm<8>(color.g)) << 8) | 148 (static_cast<unsigned int>(snorm<8>(color.r)) << 0); 149 break; 150 case FORMAT_X8B8G8R8: 151 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0); 152 break; 153 case FORMAT_X8B8G8R8I: 154 *(unsigned int*)element = 0x7F000000 | 155 (static_cast<unsigned int>(scast<8>(color.b)) << 16) | 156 (static_cast<unsigned int>(scast<8>(color.g)) << 8) | 157 (static_cast<unsigned int>(scast<8>(color.r)) << 0); 158 case FORMAT_X8B8G8R8UI: 159 *(unsigned int*)element = 0xFF000000 | (ucast<8>(color.b) << 16) | (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0); 160 break; 161 case FORMAT_A2R10G10B10: 162 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.r) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.b) << 0); 163 break; 164 case FORMAT_A2B10G10R10: 165 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.b) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.r) << 0); 166 break; 167 case FORMAT_G8R8I_SNORM: 168 *(unsigned short*)element = (static_cast<unsigned short>(snorm<8>(color.g)) << 8) | 169 (static_cast<unsigned short>(snorm<8>(color.r)) << 0); 170 break; 171 case FORMAT_G8R8: 172 *(unsigned short*)element = (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0); 173 break; 174 case FORMAT_G8R8I: 175 *(unsigned short*)element = (static_cast<unsigned short>(scast<8>(color.g)) << 8) | 176 (static_cast<unsigned short>(scast<8>(color.r)) << 0); 177 break; 178 case FORMAT_G8R8UI: 179 *(unsigned short*)element = (ucast<8>(color.g) << 8) | (ucast<8>(color.r) << 0); 180 break; 181 case FORMAT_G16R16: 182 *(unsigned int*)element = (unorm<16>(color.g) << 16) | (unorm<16>(color.r) << 0); 183 break; 184 case FORMAT_G16R16I: 185 *(unsigned int*)element = (static_cast<unsigned int>(scast<16>(color.g)) << 16) | 186 (static_cast<unsigned int>(scast<16>(color.r)) << 0); 187 break; 188 case FORMAT_G16R16UI: 189 *(unsigned int*)element = (ucast<16>(color.g) << 16) | (ucast<16>(color.r) << 0); 190 break; 191 case FORMAT_G32R32I: 192 case FORMAT_G32R32UI: 193 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r); 194 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g); 195 break; 196 case FORMAT_A16B16G16R16: 197 ((unsigned short*)element)[0] = unorm<16>(color.r); 198 ((unsigned short*)element)[1] = unorm<16>(color.g); 199 ((unsigned short*)element)[2] = unorm<16>(color.b); 200 ((unsigned short*)element)[3] = unorm<16>(color.a); 201 break; 202 case FORMAT_A16B16G16R16I: 203 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r)); 204 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g)); 205 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b)); 206 ((unsigned short*)element)[3] = static_cast<unsigned short>(scast<16>(color.a)); 207 break; 208 case FORMAT_A16B16G16R16UI: 209 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r)); 210 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g)); 211 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b)); 212 ((unsigned short*)element)[3] = static_cast<unsigned short>(ucast<16>(color.a)); 213 break; 214 case FORMAT_X16B16G16R16I: 215 ((unsigned short*)element)[0] = static_cast<unsigned short>(scast<16>(color.r)); 216 ((unsigned short*)element)[1] = static_cast<unsigned short>(scast<16>(color.g)); 217 ((unsigned short*)element)[2] = static_cast<unsigned short>(scast<16>(color.b)); 218 break; 219 case FORMAT_X16B16G16R16UI: 220 ((unsigned short*)element)[0] = static_cast<unsigned short>(ucast<16>(color.r)); 221 ((unsigned short*)element)[1] = static_cast<unsigned short>(ucast<16>(color.g)); 222 ((unsigned short*)element)[2] = static_cast<unsigned short>(ucast<16>(color.b)); 223 break; 224 case FORMAT_A32B32G32R32I: 225 case FORMAT_A32B32G32R32UI: 226 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r); 227 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g); 228 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b); 229 ((unsigned int*)element)[3] = static_cast<unsigned int>(color.a); 230 break; 231 case FORMAT_X32B32G32R32I: 232 case FORMAT_X32B32G32R32UI: 233 ((unsigned int*)element)[0] = static_cast<unsigned int>(color.r); 234 ((unsigned int*)element)[1] = static_cast<unsigned int>(color.g); 235 ((unsigned int*)element)[2] = static_cast<unsigned int>(color.b); 236 break; 237 case FORMAT_V8U8: 238 *(unsigned short*)element = (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0); 239 break; 240 case FORMAT_L6V5U5: 241 *(unsigned short*)element = (unorm<6>(color.b) << 10) | (snorm<5>(color.g) << 5) | (snorm<5>(color.r) << 0); 242 break; 243 case FORMAT_Q8W8V8U8: 244 *(unsigned int*)element = (snorm<8>(color.a) << 24) | (snorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0); 245 break; 246 case FORMAT_X8L8V8U8: 247 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0); 248 break; 249 case FORMAT_V16U16: 250 *(unsigned int*)element = (snorm<16>(color.g) << 16) | (snorm<16>(color.r) << 0); 251 break; 252 case FORMAT_A2W10V10U10: 253 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (snorm<10>(color.b) << 20) | (snorm<10>(color.g) << 10) | (snorm<10>(color.r) << 0); 254 break; 255 case FORMAT_A16W16V16U16: 256 ((unsigned short*)element)[0] = snorm<16>(color.r); 257 ((unsigned short*)element)[1] = snorm<16>(color.g); 258 ((unsigned short*)element)[2] = snorm<16>(color.b); 259 ((unsigned short*)element)[3] = unorm<16>(color.a); 260 break; 261 case FORMAT_Q16W16V16U16: 262 ((unsigned short*)element)[0] = snorm<16>(color.r); 263 ((unsigned short*)element)[1] = snorm<16>(color.g); 264 ((unsigned short*)element)[2] = snorm<16>(color.b); 265 ((unsigned short*)element)[3] = snorm<16>(color.a); 266 break; 267 case FORMAT_R8G8B8: 268 ((unsigned char*)element)[0] = unorm<8>(color.b); 269 ((unsigned char*)element)[1] = unorm<8>(color.g); 270 ((unsigned char*)element)[2] = unorm<8>(color.r); 271 break; 272 case FORMAT_B8G8R8: 273 ((unsigned char*)element)[0] = unorm<8>(color.r); 274 ((unsigned char*)element)[1] = unorm<8>(color.g); 275 ((unsigned char*)element)[2] = unorm<8>(color.b); 276 break; 277 case FORMAT_R16F: 278 *(half*)element = (half)color.r; 279 break; 280 case FORMAT_A16F: 281 *(half*)element = (half)color.a; 282 break; 283 case FORMAT_G16R16F: 284 ((half*)element)[0] = (half)color.r; 285 ((half*)element)[1] = (half)color.g; 286 break; 287 case FORMAT_B16G16R16F: 288 ((half*)element)[0] = (half)color.r; 289 ((half*)element)[1] = (half)color.g; 290 ((half*)element)[2] = (half)color.b; 291 break; 292 case FORMAT_A16B16G16R16F: 293 ((half*)element)[0] = (half)color.r; 294 ((half*)element)[1] = (half)color.g; 295 ((half*)element)[2] = (half)color.b; 296 ((half*)element)[3] = (half)color.a; 297 break; 298 case FORMAT_A32F: 299 *(float*)element = color.a; 300 break; 301 case FORMAT_R32F: 302 *(float*)element = color.r; 303 break; 304 case FORMAT_G32R32F: 305 ((float*)element)[0] = color.r; 306 ((float*)element)[1] = color.g; 307 break; 308 case FORMAT_X32B32G32R32F: 309 ((float*)element)[3] = 1.0f; 310 case FORMAT_B32G32R32F: 311 ((float*)element)[0] = color.r; 312 ((float*)element)[1] = color.g; 313 ((float*)element)[2] = color.b; 314 break; 315 case FORMAT_A32B32G32R32F: 316 ((float*)element)[0] = color.r; 317 ((float*)element)[1] = color.g; 318 ((float*)element)[2] = color.b; 319 ((float*)element)[3] = color.a; 320 break; 321 case FORMAT_D32F: 322 case FORMAT_D32F_LOCKABLE: 323 case FORMAT_D32FS8_TEXTURE: 324 case FORMAT_D32FS8_SHADOW: 325 *((float*)element) = color.r; 326 break; 327 case FORMAT_D32F_COMPLEMENTARY: 328 *((float*)element) = 1 - color.r; 329 break; 330 case FORMAT_S8: 331 *((unsigned char*)element) = unorm<8>(color.r); 332 break; 333 case FORMAT_L8: 334 *(unsigned char*)element = unorm<8>(color.r); 335 break; 336 case FORMAT_A4L4: 337 *(unsigned char*)element = (unorm<4>(color.a) << 4) | (unorm<4>(color.r) << 0); 338 break; 339 case FORMAT_L16: 340 *(unsigned short*)element = unorm<16>(color.r); 341 break; 342 case FORMAT_A8L8: 343 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<8>(color.r) << 0); 344 break; 345 case FORMAT_L16F: 346 *(half*)element = (half)color.r; 347 break; 348 case FORMAT_A16L16F: 349 ((half*)element)[0] = (half)color.r; 350 ((half*)element)[1] = (half)color.a; 351 break; 352 case FORMAT_L32F: 353 *(float*)element = color.r; 354 break; 355 case FORMAT_A32L32F: 356 ((float*)element)[0] = color.r; 357 ((float*)element)[1] = color.a; 358 break; 359 default: 360 ASSERT(false); 361 } 362 } 363 364 Color<float> Surface::Buffer::read(int x, int y, int z) const 365 { 366 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB; 367 368 return read(element); 369 } 370 371 Color<float> Surface::Buffer::read(int x, int y) const 372 { 373 void *element = (unsigned char*)buffer + x * bytes + y * pitchB; 374 375 return read(element); 376 } 377 378 inline Color<float> Surface::Buffer::read(void *element) const 379 { 380 float r = 0.0f; 381 float g = 0.0f; 382 float b = 0.0f; 383 float a = 1.0f; 384 385 switch(format) 386 { 387 case FORMAT_P8: 388 { 389 ASSERT(palette); 390 391 unsigned int abgr = palette[*(unsigned char*)element]; 392 393 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF); 394 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00); 395 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000); 396 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000); 397 } 398 break; 399 case FORMAT_A8P8: 400 { 401 ASSERT(palette); 402 403 unsigned int bgr = palette[((unsigned char*)element)[0]]; 404 405 r = (bgr & 0x000000FF) * (1.0f / 0x000000FF); 406 g = (bgr & 0x0000FF00) * (1.0f / 0x0000FF00); 407 b = (bgr & 0x00FF0000) * (1.0f / 0x00FF0000); 408 a = ((unsigned char*)element)[1] * (1.0f / 0xFF); 409 } 410 break; 411 case FORMAT_A8: 412 r = 0; 413 g = 0; 414 b = 0; 415 a = *(unsigned char*)element * (1.0f / 0xFF); 416 break; 417 case FORMAT_R8I_SNORM: 418 r = max((*(signed char*)element) * (1.0f / 0x7F), -1.0f); 419 break; 420 case FORMAT_R8: 421 r = *(unsigned char*)element * (1.0f / 0xFF); 422 break; 423 case FORMAT_R8I: 424 r = *(signed char*)element; 425 break; 426 case FORMAT_R8UI: 427 r = *(unsigned char*)element; 428 break; 429 case FORMAT_R3G3B2: 430 { 431 unsigned char rgb = *(unsigned char*)element; 432 433 r = (rgb & 0xE0) * (1.0f / 0xE0); 434 g = (rgb & 0x1C) * (1.0f / 0x1C); 435 b = (rgb & 0x03) * (1.0f / 0x03); 436 } 437 break; 438 case FORMAT_A8R3G3B2: 439 { 440 unsigned short argb = *(unsigned short*)element; 441 442 a = (argb & 0xFF00) * (1.0f / 0xFF00); 443 r = (argb & 0x00E0) * (1.0f / 0x00E0); 444 g = (argb & 0x001C) * (1.0f / 0x001C); 445 b = (argb & 0x0003) * (1.0f / 0x0003); 446 } 447 break; 448 case FORMAT_X4R4G4B4: 449 { 450 unsigned short rgb = *(unsigned short*)element; 451 452 r = (rgb & 0x0F00) * (1.0f / 0x0F00); 453 g = (rgb & 0x00F0) * (1.0f / 0x00F0); 454 b = (rgb & 0x000F) * (1.0f / 0x000F); 455 } 456 break; 457 case FORMAT_A4R4G4B4: 458 { 459 unsigned short argb = *(unsigned short*)element; 460 461 a = (argb & 0xF000) * (1.0f / 0xF000); 462 r = (argb & 0x0F00) * (1.0f / 0x0F00); 463 g = (argb & 0x00F0) * (1.0f / 0x00F0); 464 b = (argb & 0x000F) * (1.0f / 0x000F); 465 } 466 break; 467 case FORMAT_R4G4B4A4: 468 { 469 unsigned short rgba = *(unsigned short*)element; 470 471 r = (rgba & 0xF000) * (1.0f / 0xF000); 472 g = (rgba & 0x0F00) * (1.0f / 0x0F00); 473 b = (rgba & 0x00F0) * (1.0f / 0x00F0); 474 a = (rgba & 0x000F) * (1.0f / 0x000F); 475 } 476 break; 477 case FORMAT_R5G6B5: 478 { 479 unsigned short rgb = *(unsigned short*)element; 480 481 r = (rgb & 0xF800) * (1.0f / 0xF800); 482 g = (rgb & 0x07E0) * (1.0f / 0x07E0); 483 b = (rgb & 0x001F) * (1.0f / 0x001F); 484 } 485 break; 486 case FORMAT_A1R5G5B5: 487 { 488 unsigned short argb = *(unsigned short*)element; 489 490 a = (argb & 0x8000) * (1.0f / 0x8000); 491 r = (argb & 0x7C00) * (1.0f / 0x7C00); 492 g = (argb & 0x03E0) * (1.0f / 0x03E0); 493 b = (argb & 0x001F) * (1.0f / 0x001F); 494 } 495 break; 496 case FORMAT_R5G5B5A1: 497 { 498 unsigned short rgba = *(unsigned short*)element; 499 500 r = (rgba & 0xF800) * (1.0f / 0xF800); 501 g = (rgba & 0x07C0) * (1.0f / 0x07C0); 502 b = (rgba & 0x003E) * (1.0f / 0x003E); 503 a = (rgba & 0x0001) * (1.0f / 0x0001); 504 } 505 break; 506 case FORMAT_X1R5G5B5: 507 { 508 unsigned short xrgb = *(unsigned short*)element; 509 510 r = (xrgb & 0x7C00) * (1.0f / 0x7C00); 511 g = (xrgb & 0x03E0) * (1.0f / 0x03E0); 512 b = (xrgb & 0x001F) * (1.0f / 0x001F); 513 } 514 break; 515 case FORMAT_A8R8G8B8: 516 { 517 unsigned int argb = *(unsigned int*)element; 518 519 a = (argb & 0xFF000000) * (1.0f / 0xFF000000); 520 r = (argb & 0x00FF0000) * (1.0f / 0x00FF0000); 521 g = (argb & 0x0000FF00) * (1.0f / 0x0000FF00); 522 b = (argb & 0x000000FF) * (1.0f / 0x000000FF); 523 } 524 break; 525 case FORMAT_X8R8G8B8: 526 { 527 unsigned int xrgb = *(unsigned int*)element; 528 529 r = (xrgb & 0x00FF0000) * (1.0f / 0x00FF0000); 530 g = (xrgb & 0x0000FF00) * (1.0f / 0x0000FF00); 531 b = (xrgb & 0x000000FF) * (1.0f / 0x000000FF); 532 } 533 break; 534 case FORMAT_A8B8G8R8I_SNORM: 535 { 536 signed char* abgr = (signed char*)element; 537 538 r = max(abgr[0] * (1.0f / 0x7F), -1.0f); 539 g = max(abgr[1] * (1.0f / 0x7F), -1.0f); 540 b = max(abgr[2] * (1.0f / 0x7F), -1.0f); 541 a = max(abgr[3] * (1.0f / 0x7F), -1.0f); 542 } 543 break; 544 case FORMAT_A8B8G8R8: 545 { 546 unsigned int abgr = *(unsigned int*)element; 547 548 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000); 549 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000); 550 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00); 551 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF); 552 } 553 break; 554 case FORMAT_A8B8G8R8I: 555 { 556 signed char* abgr = (signed char*)element; 557 558 r = abgr[0]; 559 g = abgr[1]; 560 b = abgr[2]; 561 a = abgr[3]; 562 } 563 break; 564 case FORMAT_A8B8G8R8UI: 565 { 566 unsigned char* abgr = (unsigned char*)element; 567 568 r = abgr[0]; 569 g = abgr[1]; 570 b = abgr[2]; 571 a = abgr[3]; 572 } 573 break; 574 case FORMAT_X8B8G8R8I_SNORM: 575 { 576 signed char* bgr = (signed char*)element; 577 578 r = max(bgr[0] * (1.0f / 0x7F), -1.0f); 579 g = max(bgr[1] * (1.0f / 0x7F), -1.0f); 580 b = max(bgr[2] * (1.0f / 0x7F), -1.0f); 581 } 582 break; 583 case FORMAT_X8B8G8R8: 584 { 585 unsigned int xbgr = *(unsigned int*)element; 586 587 b = (xbgr & 0x00FF0000) * (1.0f / 0x00FF0000); 588 g = (xbgr & 0x0000FF00) * (1.0f / 0x0000FF00); 589 r = (xbgr & 0x000000FF) * (1.0f / 0x000000FF); 590 } 591 break; 592 case FORMAT_X8B8G8R8I: 593 { 594 signed char* bgr = (signed char*)element; 595 596 r = bgr[0]; 597 g = bgr[1]; 598 b = bgr[2]; 599 } 600 break; 601 case FORMAT_X8B8G8R8UI: 602 { 603 unsigned char* bgr = (unsigned char*)element; 604 605 r = bgr[0]; 606 g = bgr[1]; 607 b = bgr[2]; 608 } 609 break; 610 case FORMAT_G8R8I_SNORM: 611 { 612 signed char* gr = (signed char*)element; 613 614 r = (gr[0] & 0xFF00) * (1.0f / 0xFF00); 615 g = (gr[1] & 0x00FF) * (1.0f / 0x00FF); 616 } 617 break; 618 case FORMAT_G8R8: 619 { 620 unsigned short gr = *(unsigned short*)element; 621 622 g = (gr & 0xFF00) * (1.0f / 0xFF00); 623 r = (gr & 0x00FF) * (1.0f / 0x00FF); 624 } 625 break; 626 case FORMAT_G8R8I: 627 { 628 signed char* gr = (signed char*)element; 629 630 r = gr[0]; 631 g = gr[1]; 632 } 633 break; 634 case FORMAT_G8R8UI: 635 { 636 unsigned char* gr = (unsigned char*)element; 637 638 r = gr[0]; 639 g = gr[1]; 640 } 641 break; 642 case FORMAT_R16I: 643 r = *((short*)element); 644 break; 645 case FORMAT_R16UI: 646 r = *((unsigned short*)element); 647 break; 648 case FORMAT_G16R16I: 649 { 650 short* gr = (short*)element; 651 652 r = gr[0]; 653 g = gr[1]; 654 } 655 break; 656 case FORMAT_G16R16: 657 { 658 unsigned int gr = *(unsigned int*)element; 659 660 g = (gr & 0xFFFF0000) * (1.0f / 0xFFFF0000); 661 r = (gr & 0x0000FFFF) * (1.0f / 0x0000FFFF); 662 } 663 break; 664 case FORMAT_G16R16UI: 665 { 666 unsigned short* gr = (unsigned short*)element; 667 668 r = gr[0]; 669 g = gr[1]; 670 } 671 break; 672 case FORMAT_A2R10G10B10: 673 { 674 unsigned int argb = *(unsigned int*)element; 675 676 a = (argb & 0xC0000000) * (1.0f / 0xC0000000); 677 r = (argb & 0x3FF00000) * (1.0f / 0x3FF00000); 678 g = (argb & 0x000FFC00) * (1.0f / 0x000FFC00); 679 b = (argb & 0x000003FF) * (1.0f / 0x000003FF); 680 } 681 break; 682 case FORMAT_A2B10G10R10: 683 { 684 unsigned int abgr = *(unsigned int*)element; 685 686 a = (abgr & 0xC0000000) * (1.0f / 0xC0000000); 687 b = (abgr & 0x3FF00000) * (1.0f / 0x3FF00000); 688 g = (abgr & 0x000FFC00) * (1.0f / 0x000FFC00); 689 r = (abgr & 0x000003FF) * (1.0f / 0x000003FF); 690 } 691 break; 692 case FORMAT_A16B16G16R16I: 693 { 694 short* abgr = (short*)element; 695 696 r = abgr[0]; 697 g = abgr[1]; 698 b = abgr[2]; 699 a = abgr[3]; 700 } 701 break; 702 case FORMAT_A16B16G16R16: 703 r = ((unsigned short*)element)[0] * (1.0f / 0xFFFF); 704 g = ((unsigned short*)element)[1] * (1.0f / 0xFFFF); 705 b = ((unsigned short*)element)[2] * (1.0f / 0xFFFF); 706 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF); 707 break; 708 case FORMAT_A16B16G16R16UI: 709 { 710 unsigned short* abgr = (unsigned short*)element; 711 712 r = abgr[0]; 713 g = abgr[1]; 714 b = abgr[2]; 715 a = abgr[3]; 716 } 717 break; 718 case FORMAT_X16B16G16R16I: 719 { 720 short* bgr = (short*)element; 721 722 r = bgr[0]; 723 g = bgr[1]; 724 b = bgr[2]; 725 } 726 break; 727 case FORMAT_X16B16G16R16UI: 728 { 729 unsigned short* bgr = (unsigned short*)element; 730 731 r = bgr[0]; 732 g = bgr[1]; 733 b = bgr[2]; 734 } 735 break; 736 case FORMAT_A32B32G32R32I: 737 { 738 int* abgr = (int*)element; 739 740 r = static_cast<float>(abgr[0]); 741 g = static_cast<float>(abgr[1]); 742 b = static_cast<float>(abgr[2]); 743 a = static_cast<float>(abgr[3]); 744 } 745 break; 746 case FORMAT_A32B32G32R32UI: 747 { 748 unsigned int* abgr = (unsigned int*)element; 749 750 r = static_cast<float>(abgr[0]); 751 g = static_cast<float>(abgr[1]); 752 b = static_cast<float>(abgr[2]); 753 a = static_cast<float>(abgr[3]); 754 } 755 break; 756 case FORMAT_X32B32G32R32I: 757 { 758 int* bgr = (int*)element; 759 760 r = static_cast<float>(bgr[0]); 761 g = static_cast<float>(bgr[1]); 762 b = static_cast<float>(bgr[2]); 763 } 764 break; 765 case FORMAT_X32B32G32R32UI: 766 { 767 unsigned int* bgr = (unsigned int*)element; 768 769 r = static_cast<float>(bgr[0]); 770 g = static_cast<float>(bgr[1]); 771 b = static_cast<float>(bgr[2]); 772 } 773 break; 774 case FORMAT_G32R32I: 775 { 776 int* gr = (int*)element; 777 778 r = static_cast<float>(gr[0]); 779 g = static_cast<float>(gr[1]); 780 } 781 break; 782 case FORMAT_G32R32UI: 783 { 784 unsigned int* gr = (unsigned int*)element; 785 786 r = static_cast<float>(gr[0]); 787 g = static_cast<float>(gr[1]); 788 } 789 break; 790 case FORMAT_R32I: 791 r = static_cast<float>(*((int*)element)); 792 break; 793 case FORMAT_R32UI: 794 r = static_cast<float>(*((unsigned int*)element)); 795 break; 796 case FORMAT_V8U8: 797 { 798 unsigned short vu = *(unsigned short*)element; 799 800 r = ((int)(vu & 0x00FF) << 24) * (1.0f / 0x7F000000); 801 g = ((int)(vu & 0xFF00) << 16) * (1.0f / 0x7F000000); 802 } 803 break; 804 case FORMAT_L6V5U5: 805 { 806 unsigned short lvu = *(unsigned short*)element; 807 808 r = ((int)(lvu & 0x001F) << 27) * (1.0f / 0x78000000); 809 g = ((int)(lvu & 0x03E0) << 22) * (1.0f / 0x78000000); 810 b = (lvu & 0xFC00) * (1.0f / 0xFC00); 811 } 812 break; 813 case FORMAT_Q8W8V8U8: 814 { 815 unsigned int qwvu = *(unsigned int*)element; 816 817 r = ((int)(qwvu & 0x000000FF) << 24) * (1.0f / 0x7F000000); 818 g = ((int)(qwvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000); 819 b = ((int)(qwvu & 0x00FF0000) << 8) * (1.0f / 0x7F000000); 820 a = ((int)(qwvu & 0xFF000000) << 0) * (1.0f / 0x7F000000); 821 } 822 break; 823 case FORMAT_X8L8V8U8: 824 { 825 unsigned int xlvu = *(unsigned int*)element; 826 827 r = ((int)(xlvu & 0x000000FF) << 24) * (1.0f / 0x7F000000); 828 g = ((int)(xlvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000); 829 b = (xlvu & 0x00FF0000) * (1.0f / 0x00FF0000); 830 } 831 break; 832 case FORMAT_R8G8B8: 833 r = ((unsigned char*)element)[2] * (1.0f / 0xFF); 834 g = ((unsigned char*)element)[1] * (1.0f / 0xFF); 835 b = ((unsigned char*)element)[0] * (1.0f / 0xFF); 836 break; 837 case FORMAT_B8G8R8: 838 r = ((unsigned char*)element)[0] * (1.0f / 0xFF); 839 g = ((unsigned char*)element)[1] * (1.0f / 0xFF); 840 b = ((unsigned char*)element)[2] * (1.0f / 0xFF); 841 break; 842 case FORMAT_V16U16: 843 { 844 unsigned int vu = *(unsigned int*)element; 845 846 r = ((int)(vu & 0x0000FFFF) << 16) * (1.0f / 0x7FFF0000); 847 g = ((int)(vu & 0xFFFF0000) << 0) * (1.0f / 0x7FFF0000); 848 } 849 break; 850 case FORMAT_A2W10V10U10: 851 { 852 unsigned int awvu = *(unsigned int*)element; 853 854 r = ((int)(awvu & 0x000003FF) << 22) * (1.0f / 0x7FC00000); 855 g = ((int)(awvu & 0x000FFC00) << 12) * (1.0f / 0x7FC00000); 856 b = ((int)(awvu & 0x3FF00000) << 2) * (1.0f / 0x7FC00000); 857 a = (awvu & 0xC0000000) * (1.0f / 0xC0000000); 858 } 859 break; 860 case FORMAT_A16W16V16U16: 861 r = ((signed short*)element)[0] * (1.0f / 0x7FFF); 862 g = ((signed short*)element)[1] * (1.0f / 0x7FFF); 863 b = ((signed short*)element)[2] * (1.0f / 0x7FFF); 864 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF); 865 break; 866 case FORMAT_Q16W16V16U16: 867 r = ((signed short*)element)[0] * (1.0f / 0x7FFF); 868 g = ((signed short*)element)[1] * (1.0f / 0x7FFF); 869 b = ((signed short*)element)[2] * (1.0f / 0x7FFF); 870 a = ((signed short*)element)[3] * (1.0f / 0x7FFF); 871 break; 872 case FORMAT_L8: 873 r = 874 g = 875 b = *(unsigned char*)element * (1.0f / 0xFF); 876 break; 877 case FORMAT_A4L4: 878 { 879 unsigned char al = *(unsigned char*)element; 880 881 r = 882 g = 883 b = (al & 0x0F) * (1.0f / 0x0F); 884 a = (al & 0xF0) * (1.0f / 0xF0); 885 } 886 break; 887 case FORMAT_L16: 888 r = 889 g = 890 b = *(unsigned short*)element * (1.0f / 0xFFFF); 891 break; 892 case FORMAT_A8L8: 893 r = 894 g = 895 b = ((unsigned char*)element)[0] * (1.0f / 0xFF); 896 a = ((unsigned char*)element)[1] * (1.0f / 0xFF); 897 break; 898 case FORMAT_L16F: 899 r = 900 g = 901 b = *(half*)element; 902 break; 903 case FORMAT_A16L16F: 904 r = 905 g = 906 b = ((half*)element)[0]; 907 a = ((half*)element)[1]; 908 break; 909 case FORMAT_L32F: 910 r = 911 g = 912 b = *(float*)element; 913 break; 914 case FORMAT_A32L32F: 915 r = 916 g = 917 b = ((float*)element)[0]; 918 a = ((float*)element)[1]; 919 break; 920 case FORMAT_A16F: 921 a = *(half*)element; 922 break; 923 case FORMAT_R16F: 924 r = *(half*)element; 925 break; 926 case FORMAT_G16R16F: 927 r = ((half*)element)[0]; 928 g = ((half*)element)[1]; 929 break; 930 case FORMAT_B16G16R16F: 931 r = ((half*)element)[0]; 932 g = ((half*)element)[1]; 933 b = ((half*)element)[2]; 934 break; 935 case FORMAT_A16B16G16R16F: 936 r = ((half*)element)[0]; 937 g = ((half*)element)[1]; 938 b = ((half*)element)[2]; 939 a = ((half*)element)[3]; 940 break; 941 case FORMAT_A32F: 942 a = *(float*)element; 943 break; 944 case FORMAT_R32F: 945 r = *(float*)element; 946 break; 947 case FORMAT_G32R32F: 948 r = ((float*)element)[0]; 949 g = ((float*)element)[1]; 950 break; 951 case FORMAT_X32B32G32R32F: 952 case FORMAT_B32G32R32F: 953 r = ((float*)element)[0]; 954 g = ((float*)element)[1]; 955 b = ((float*)element)[2]; 956 break; 957 case FORMAT_A32B32G32R32F: 958 r = ((float*)element)[0]; 959 g = ((float*)element)[1]; 960 b = ((float*)element)[2]; 961 a = ((float*)element)[3]; 962 break; 963 case FORMAT_D32F: 964 case FORMAT_D32F_LOCKABLE: 965 case FORMAT_D32FS8_TEXTURE: 966 case FORMAT_D32FS8_SHADOW: 967 r = *(float*)element; 968 g = r; 969 b = r; 970 a = r; 971 break; 972 case FORMAT_D32F_COMPLEMENTARY: 973 r = 1.0f - *(float*)element; 974 g = r; 975 b = r; 976 a = r; 977 break; 978 case FORMAT_S8: 979 r = *(unsigned char*)element * (1.0f / 0xFF); 980 break; 981 default: 982 ASSERT(false); 983 } 984 985 // if(sRGB) 986 // { 987 // r = sRGBtoLinear(r); 988 // g = sRGBtoLinear(g); 989 // b = sRGBtoLinear(b); 990 // } 991 992 return Color<float>(r, g, b, a); 993 } 994 995 Color<float> Surface::Buffer::sample(float x, float y, float z) const 996 { 997 x -= 0.5f; 998 y -= 0.5f; 999 z -= 0.5f; 1000 1001 int x0 = clamp((int)x, 0, width - 1); 1002 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1; 1003 1004 int y0 = clamp((int)y, 0, height - 1); 1005 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1; 1006 1007 int z0 = clamp((int)z, 0, depth - 1); 1008 int z1 = (z0 + 1 >= depth) ? z0 : z0 + 1; 1009 1010 Color<float> c000 = read(x0, y0, z0); 1011 Color<float> c100 = read(x1, y0, z0); 1012 Color<float> c010 = read(x0, y1, z0); 1013 Color<float> c110 = read(x1, y1, z0); 1014 Color<float> c001 = read(x0, y0, z1); 1015 Color<float> c101 = read(x1, y0, z1); 1016 Color<float> c011 = read(x0, y1, z1); 1017 Color<float> c111 = read(x1, y1, z1); 1018 1019 float fx = x - x0; 1020 float fy = y - y0; 1021 float fz = z - z0; 1022 1023 c000 *= (1 - fx) * (1 - fy) * (1 - fz); 1024 c100 *= fx * (1 - fy) * (1 - fz); 1025 c010 *= (1 - fx) * fy * (1 - fz); 1026 c110 *= fx * fy * (1 - fz); 1027 c001 *= (1 - fx) * (1 - fy) * fz; 1028 c101 *= fx * (1 - fy) * fz; 1029 c011 *= (1 - fx) * fy * fz; 1030 c111 *= fx * fy * fz; 1031 1032 return c000 + c100 + c010 + c110 + c001 + c101 + c011 + c111; 1033 } 1034 1035 Color<float> Surface::Buffer::sample(float x, float y) const 1036 { 1037 x -= 0.5f; 1038 y -= 0.5f; 1039 1040 int x0 = clamp((int)x, 0, width - 1); 1041 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1; 1042 1043 int y0 = clamp((int)y, 0, height - 1); 1044 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1; 1045 1046 Color<float> c00 = read(x0, y0); 1047 Color<float> c10 = read(x1, y0); 1048 Color<float> c01 = read(x0, y1); 1049 Color<float> c11 = read(x1, y1); 1050 1051 float fx = x - x0; 1052 float fy = y - y0; 1053 1054 c00 *= (1 - fx) * (1 - fy); 1055 c10 *= fx * (1 - fy); 1056 c01 *= (1 - fx) * fy; 1057 c11 *= fx * fy; 1058 1059 return c00 + c10 + c01 + c11; 1060 } 1061 1062 void *Surface::Buffer::lockRect(int x, int y, int z, Lock lock) 1063 { 1064 this->lock = lock; 1065 1066 switch(lock) 1067 { 1068 case LOCK_UNLOCKED: 1069 case LOCK_READONLY: 1070 break; 1071 case LOCK_WRITEONLY: 1072 case LOCK_READWRITE: 1073 case LOCK_DISCARD: 1074 dirty = true; 1075 break; 1076 default: 1077 ASSERT(false); 1078 } 1079 1080 if(buffer) 1081 { 1082 switch(format) 1083 { 1084 #if S3TC_SUPPORT 1085 case FORMAT_DXT1: 1086 #endif 1087 case FORMAT_ATI1: 1088 case FORMAT_ETC1: 1089 case FORMAT_R11_EAC: 1090 case FORMAT_SIGNED_R11_EAC: 1091 case FORMAT_RGB8_ETC2: 1092 case FORMAT_SRGB8_ETC2: 1093 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1094 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1095 return (unsigned char*)buffer + 8 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1096 case FORMAT_RG11_EAC: 1097 case FORMAT_SIGNED_RG11_EAC: 1098 case FORMAT_RGBA8_ETC2_EAC: 1099 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1100 case FORMAT_RGBA_ASTC_4x4_KHR: 1101 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1102 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1103 case FORMAT_RGBA_ASTC_5x4_KHR: 1104 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1105 return (unsigned char*)buffer + 16 * (x / 5) + (y / 4) * pitchB + z * sliceB; 1106 case FORMAT_RGBA_ASTC_5x5_KHR: 1107 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1108 return (unsigned char*)buffer + 16 * (x / 5) + (y / 5) * pitchB + z * sliceB; 1109 case FORMAT_RGBA_ASTC_6x5_KHR: 1110 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1111 return (unsigned char*)buffer + 16 * (x / 6) + (y / 5) * pitchB + z * sliceB; 1112 case FORMAT_RGBA_ASTC_6x6_KHR: 1113 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1114 return (unsigned char*)buffer + 16 * (x / 6) + (y / 6) * pitchB + z * sliceB; 1115 case FORMAT_RGBA_ASTC_8x5_KHR: 1116 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1117 return (unsigned char*)buffer + 16 * (x / 8) + (y / 5) * pitchB + z * sliceB; 1118 case FORMAT_RGBA_ASTC_8x6_KHR: 1119 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1120 return (unsigned char*)buffer + 16 * (x / 8) + (y / 6) * pitchB + z * sliceB; 1121 case FORMAT_RGBA_ASTC_8x8_KHR: 1122 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1123 return (unsigned char*)buffer + 16 * (x / 8) + (y / 8) * pitchB + z * sliceB; 1124 case FORMAT_RGBA_ASTC_10x5_KHR: 1125 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1126 return (unsigned char*)buffer + 16 * (x / 10) + (y / 5) * pitchB + z * sliceB; 1127 case FORMAT_RGBA_ASTC_10x6_KHR: 1128 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1129 return (unsigned char*)buffer + 16 * (x / 10) + (y / 6) * pitchB + z * sliceB; 1130 case FORMAT_RGBA_ASTC_10x8_KHR: 1131 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1132 return (unsigned char*)buffer + 16 * (x / 10) + (y / 8) * pitchB + z * sliceB; 1133 case FORMAT_RGBA_ASTC_10x10_KHR: 1134 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1135 return (unsigned char*)buffer + 16 * (x / 10) + (y / 10) * pitchB + z * sliceB; 1136 case FORMAT_RGBA_ASTC_12x10_KHR: 1137 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1138 return (unsigned char*)buffer + 16 * (x / 12) + (y / 10) * pitchB + z * sliceB; 1139 case FORMAT_RGBA_ASTC_12x12_KHR: 1140 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 1141 return (unsigned char*)buffer + 16 * (x / 12) + (y / 12) * pitchB + z * sliceB; 1142 #if S3TC_SUPPORT 1143 case FORMAT_DXT3: 1144 case FORMAT_DXT5: 1145 #endif 1146 case FORMAT_ATI2: 1147 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB; 1148 default: 1149 return (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB; 1150 } 1151 } 1152 1153 return 0; 1154 } 1155 1156 void Surface::Buffer::unlockRect() 1157 { 1158 lock = LOCK_UNLOCKED; 1159 } 1160 1161 Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false) 1162 { 1163 resource = new Resource(0); 1164 hasParent = false; 1165 ownExternal = false; 1166 depth = max(1, depth); 1167 1168 external.buffer = pixels; 1169 external.width = width; 1170 external.height = height; 1171 external.depth = depth; 1172 external.format = format; 1173 external.bytes = bytes(external.format); 1174 external.pitchB = pitch; 1175 external.pitchP = external.bytes ? pitch / external.bytes : 0; 1176 external.sliceB = slice; 1177 external.sliceP = external.bytes ? slice / external.bytes : 0; 1178 external.lock = LOCK_UNLOCKED; 1179 external.dirty = true; 1180 1181 internal.buffer = 0; 1182 internal.width = width; 1183 internal.height = height; 1184 internal.depth = depth; 1185 internal.format = selectInternalFormat(format); 1186 internal.bytes = bytes(internal.format); 1187 internal.pitchB = pitchB(internal.width, internal.format, false); 1188 internal.pitchP = pitchP(internal.width, internal.format, false); 1189 internal.sliceB = sliceB(internal.width, internal.height, internal.format, false); 1190 internal.sliceP = sliceP(internal.width, internal.height, internal.format, false); 1191 internal.lock = LOCK_UNLOCKED; 1192 internal.dirty = false; 1193 1194 stencil.buffer = 0; 1195 stencil.width = width; 1196 stencil.height = height; 1197 stencil.depth = depth; 1198 stencil.format = FORMAT_S8; 1199 stencil.bytes = bytes(stencil.format); 1200 stencil.pitchB = pitchB(stencil.width, stencil.format, false); 1201 stencil.pitchP = pitchP(stencil.width, stencil.format, false); 1202 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, false); 1203 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, false); 1204 stencil.lock = LOCK_UNLOCKED; 1205 stencil.dirty = false; 1206 1207 dirtyMipmaps = true; 1208 paletteUsed = 0; 1209 } 1210 1211 Surface::Surface(Resource *texture, int width, int height, int depth, Format format, bool lockable, bool renderTarget, int pitchPprovided) : lockable(lockable), renderTarget(renderTarget) 1212 { 1213 resource = texture ? texture : new Resource(0); 1214 hasParent = texture != 0; 1215 ownExternal = true; 1216 depth = max(1, depth); 1217 1218 external.buffer = 0; 1219 external.width = width; 1220 external.height = height; 1221 external.depth = depth; 1222 external.format = format; 1223 external.bytes = bytes(external.format); 1224 external.pitchB = pitchB(external.width, external.format, renderTarget && !texture); 1225 external.pitchP = pitchP(external.width, external.format, renderTarget && !texture); 1226 external.sliceB = sliceB(external.width, external.height, external.format, renderTarget && !texture); 1227 external.sliceP = sliceP(external.width, external.height, external.format, renderTarget && !texture); 1228 external.lock = LOCK_UNLOCKED; 1229 external.dirty = false; 1230 1231 internal.buffer = 0; 1232 internal.width = width; 1233 internal.height = height; 1234 internal.depth = depth; 1235 internal.format = selectInternalFormat(format); 1236 internal.bytes = bytes(internal.format); 1237 internal.pitchB = !pitchPprovided ? pitchB(internal.width, internal.format, renderTarget) : pitchPprovided * internal.bytes; 1238 internal.pitchP = !pitchPprovided ? pitchP(internal.width, internal.format, renderTarget) : pitchPprovided; 1239 internal.sliceB = sliceB(internal.width, internal.height, internal.format, renderTarget); 1240 internal.sliceP = sliceP(internal.width, internal.height, internal.format, renderTarget); 1241 internal.lock = LOCK_UNLOCKED; 1242 internal.dirty = false; 1243 1244 stencil.buffer = 0; 1245 stencil.width = width; 1246 stencil.height = height; 1247 stencil.depth = depth; 1248 stencil.format = FORMAT_S8; 1249 stencil.bytes = bytes(stencil.format); 1250 stencil.pitchB = pitchB(stencil.width, stencil.format, renderTarget); 1251 stencil.pitchP = pitchP(stencil.width, stencil.format, renderTarget); 1252 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, renderTarget); 1253 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, renderTarget); 1254 stencil.lock = LOCK_UNLOCKED; 1255 stencil.dirty = false; 1256 1257 dirtyMipmaps = true; 1258 paletteUsed = 0; 1259 } 1260 1261 Surface::~Surface() 1262 { 1263 // Synchronize so we can deallocate the buffers below 1264 resource->lock(DESTRUCT); 1265 resource->unlock(); 1266 1267 if(!hasParent) 1268 { 1269 resource->destruct(); 1270 } 1271 1272 if(ownExternal) 1273 { 1274 deallocate(external.buffer); 1275 } 1276 1277 if(internal.buffer != external.buffer) 1278 { 1279 deallocate(internal.buffer); 1280 } 1281 1282 deallocate(stencil.buffer); 1283 1284 external.buffer = 0; 1285 internal.buffer = 0; 1286 stencil.buffer = 0; 1287 } 1288 1289 void *Surface::lockExternal(int x, int y, int z, Lock lock, Accessor client) 1290 { 1291 resource->lock(client); 1292 1293 if(!external.buffer) 1294 { 1295 if(internal.buffer && identicalFormats()) 1296 { 1297 external.buffer = internal.buffer; 1298 } 1299 else 1300 { 1301 external.buffer = allocateBuffer(external.width, external.height, external.depth, external.format); 1302 } 1303 } 1304 1305 if(internal.dirty) 1306 { 1307 if(lock != LOCK_DISCARD) 1308 { 1309 update(external, internal); 1310 } 1311 1312 internal.dirty = false; 1313 } 1314 1315 switch(lock) 1316 { 1317 case LOCK_READONLY: 1318 break; 1319 case LOCK_WRITEONLY: 1320 case LOCK_READWRITE: 1321 case LOCK_DISCARD: 1322 dirtyMipmaps = true; 1323 break; 1324 default: 1325 ASSERT(false); 1326 } 1327 1328 return external.lockRect(x, y, z, lock); 1329 } 1330 1331 void Surface::unlockExternal() 1332 { 1333 resource->unlock(); 1334 1335 external.unlockRect(); 1336 } 1337 1338 void *Surface::lockInternal(int x, int y, int z, Lock lock, Accessor client) 1339 { 1340 if(lock != LOCK_UNLOCKED) 1341 { 1342 resource->lock(client); 1343 } 1344 1345 if(!internal.buffer) 1346 { 1347 if(external.buffer && identicalFormats()) 1348 { 1349 internal.buffer = external.buffer; 1350 } 1351 else 1352 { 1353 internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.format); 1354 } 1355 } 1356 1357 // FIXME: WHQL requires conversion to lower external precision and back 1358 if(logPrecision >= WHQL) 1359 { 1360 if(internal.dirty && renderTarget && internal.format != external.format) 1361 { 1362 if(lock != LOCK_DISCARD) 1363 { 1364 switch(external.format) 1365 { 1366 case FORMAT_R3G3B2: 1367 case FORMAT_A8R3G3B2: 1368 case FORMAT_A1R5G5B5: 1369 case FORMAT_A2R10G10B10: 1370 case FORMAT_A2B10G10R10: 1371 lockExternal(0, 0, 0, LOCK_READWRITE, client); 1372 unlockExternal(); 1373 break; 1374 default: 1375 // Difference passes WHQL 1376 break; 1377 } 1378 } 1379 } 1380 } 1381 1382 if(external.dirty || (isPalette(external.format) && paletteUsed != Surface::paletteID)) 1383 { 1384 if(lock != LOCK_DISCARD) 1385 { 1386 update(internal, external); 1387 } 1388 1389 external.dirty = false; 1390 paletteUsed = Surface::paletteID; 1391 } 1392 1393 switch(lock) 1394 { 1395 case LOCK_UNLOCKED: 1396 case LOCK_READONLY: 1397 break; 1398 case LOCK_WRITEONLY: 1399 case LOCK_READWRITE: 1400 case LOCK_DISCARD: 1401 dirtyMipmaps = true; 1402 break; 1403 default: 1404 ASSERT(false); 1405 } 1406 1407 if(lock == LOCK_READONLY && client == PUBLIC) 1408 { 1409 resolve(); 1410 } 1411 1412 return internal.lockRect(x, y, z, lock); 1413 } 1414 1415 void Surface::unlockInternal() 1416 { 1417 resource->unlock(); 1418 1419 internal.unlockRect(); 1420 } 1421 1422 void *Surface::lockStencil(int front, Accessor client) 1423 { 1424 resource->lock(client); 1425 1426 if(!stencil.buffer) 1427 { 1428 stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.format); 1429 } 1430 1431 return stencil.lockRect(0, 0, front, LOCK_READWRITE); // FIXME 1432 } 1433 1434 void Surface::unlockStencil() 1435 { 1436 resource->unlock(); 1437 1438 stencil.unlockRect(); 1439 } 1440 1441 int Surface::bytes(Format format) 1442 { 1443 switch(format) 1444 { 1445 case FORMAT_NULL: return 0; 1446 case FORMAT_P8: return 1; 1447 case FORMAT_A8P8: return 2; 1448 case FORMAT_A8: return 1; 1449 case FORMAT_R8I: return 1; 1450 case FORMAT_R8: return 1; 1451 case FORMAT_R3G3B2: return 1; 1452 case FORMAT_R16I: return 2; 1453 case FORMAT_R16UI: return 2; 1454 case FORMAT_A8R3G3B2: return 2; 1455 case FORMAT_R5G6B5: return 2; 1456 case FORMAT_A1R5G5B5: return 2; 1457 case FORMAT_X1R5G5B5: return 2; 1458 case FORMAT_R5G5B5A1: return 2; 1459 case FORMAT_X4R4G4B4: return 2; 1460 case FORMAT_A4R4G4B4: return 2; 1461 case FORMAT_R4G4B4A4: return 2; 1462 case FORMAT_R8G8B8: return 3; 1463 case FORMAT_B8G8R8: return 3; 1464 case FORMAT_R32I: return 4; 1465 case FORMAT_R32UI: return 4; 1466 case FORMAT_X8R8G8B8: return 4; 1467 // case FORMAT_X8G8R8B8Q: return 4; 1468 case FORMAT_A8R8G8B8: return 4; 1469 // case FORMAT_A8G8R8B8Q: return 4; 1470 case FORMAT_X8B8G8R8I: return 4; 1471 case FORMAT_X8B8G8R8: return 4; 1472 case FORMAT_A8B8G8R8I: return 4; 1473 case FORMAT_R8UI: return 1; 1474 case FORMAT_G8R8UI: return 2; 1475 case FORMAT_X8B8G8R8UI: return 4; 1476 case FORMAT_A8B8G8R8UI: return 4; 1477 case FORMAT_A8B8G8R8: return 4; 1478 case FORMAT_R8I_SNORM: return 1; 1479 case FORMAT_G8R8I_SNORM: return 2; 1480 case FORMAT_X8B8G8R8I_SNORM: return 4; 1481 case FORMAT_A8B8G8R8I_SNORM: return 4; 1482 case FORMAT_A2R10G10B10: return 4; 1483 case FORMAT_A2B10G10R10: return 4; 1484 case FORMAT_G8R8I: return 2; 1485 case FORMAT_G8R8: return 2; 1486 case FORMAT_G16R16I: return 4; 1487 case FORMAT_G16R16UI: return 4; 1488 case FORMAT_G16R16: return 4; 1489 case FORMAT_G32R32I: return 8; 1490 case FORMAT_G32R32UI: return 8; 1491 case FORMAT_X16B16G16R16I: return 8; 1492 case FORMAT_X16B16G16R16UI: return 8; 1493 case FORMAT_A16B16G16R16I: return 8; 1494 case FORMAT_A16B16G16R16UI: return 8; 1495 case FORMAT_A16B16G16R16: return 8; 1496 case FORMAT_X32B32G32R32I: return 16; 1497 case FORMAT_X32B32G32R32UI: return 16; 1498 case FORMAT_A32B32G32R32I: return 16; 1499 case FORMAT_A32B32G32R32UI: return 16; 1500 // Compressed formats 1501 #if S3TC_SUPPORT 1502 case FORMAT_DXT1: return 2; // Column of four pixels 1503 case FORMAT_DXT3: return 4; // Column of four pixels 1504 case FORMAT_DXT5: return 4; // Column of four pixels 1505 #endif 1506 case FORMAT_ATI1: return 2; // Column of four pixels 1507 case FORMAT_ATI2: return 4; // Column of four pixels 1508 case FORMAT_ETC1: return 2; // Column of four pixels 1509 case FORMAT_R11_EAC: return 2; 1510 case FORMAT_SIGNED_R11_EAC: return 2; 1511 case FORMAT_RG11_EAC: return 4; 1512 case FORMAT_SIGNED_RG11_EAC: return 4; 1513 case FORMAT_RGB8_ETC2: return 2; 1514 case FORMAT_SRGB8_ETC2: return 2; 1515 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2; 1516 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2; 1517 case FORMAT_RGBA8_ETC2_EAC: return 4; 1518 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: return 4; 1519 case FORMAT_RGBA_ASTC_4x4_KHR: 1520 case FORMAT_RGBA_ASTC_5x4_KHR: 1521 case FORMAT_RGBA_ASTC_5x5_KHR: 1522 case FORMAT_RGBA_ASTC_6x5_KHR: 1523 case FORMAT_RGBA_ASTC_6x6_KHR: 1524 case FORMAT_RGBA_ASTC_8x5_KHR: 1525 case FORMAT_RGBA_ASTC_8x6_KHR: 1526 case FORMAT_RGBA_ASTC_8x8_KHR: 1527 case FORMAT_RGBA_ASTC_10x5_KHR: 1528 case FORMAT_RGBA_ASTC_10x6_KHR: 1529 case FORMAT_RGBA_ASTC_10x8_KHR: 1530 case FORMAT_RGBA_ASTC_10x10_KHR: 1531 case FORMAT_RGBA_ASTC_12x10_KHR: 1532 case FORMAT_RGBA_ASTC_12x12_KHR: 1533 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1534 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1535 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1536 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1537 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1538 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1539 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1540 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1541 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1542 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1543 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1544 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1545 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1546 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: return 0; // FIXME 1547 // Bumpmap formats 1548 case FORMAT_V8U8: return 2; 1549 case FORMAT_L6V5U5: return 2; 1550 case FORMAT_Q8W8V8U8: return 4; 1551 case FORMAT_X8L8V8U8: return 4; 1552 case FORMAT_A2W10V10U10: return 4; 1553 case FORMAT_V16U16: return 4; 1554 case FORMAT_A16W16V16U16: return 8; 1555 case FORMAT_Q16W16V16U16: return 8; 1556 // Luminance formats 1557 case FORMAT_L8: return 1; 1558 case FORMAT_A4L4: return 1; 1559 case FORMAT_L16: return 2; 1560 case FORMAT_A8L8: return 2; 1561 case FORMAT_L16F: return 2; 1562 case FORMAT_A16L16F: return 4; 1563 case FORMAT_L32F: return 4; 1564 case FORMAT_A32L32F: return 8; 1565 // Floating-point formats 1566 case FORMAT_A16F: return 2; 1567 case FORMAT_R16F: return 2; 1568 case FORMAT_G16R16F: return 4; 1569 case FORMAT_B16G16R16F: return 6; 1570 case FORMAT_A16B16G16R16F: return 8; 1571 case FORMAT_A32F: return 4; 1572 case FORMAT_R32F: return 4; 1573 case FORMAT_G32R32F: return 8; 1574 case FORMAT_B32G32R32F: return 12; 1575 case FORMAT_X32B32G32R32F: return 16; 1576 case FORMAT_A32B32G32R32F: return 16; 1577 // Depth/stencil formats 1578 case FORMAT_D16: return 2; 1579 case FORMAT_D32: return 4; 1580 case FORMAT_D24X8: return 4; 1581 case FORMAT_D24S8: return 4; 1582 case FORMAT_D24FS8: return 4; 1583 case FORMAT_D32F: return 4; 1584 case FORMAT_D32F_COMPLEMENTARY: return 4; 1585 case FORMAT_D32F_LOCKABLE: return 4; 1586 case FORMAT_D32FS8_TEXTURE: return 4; 1587 case FORMAT_D32FS8_SHADOW: return 4; 1588 case FORMAT_DF24S8: return 4; 1589 case FORMAT_DF16S8: return 2; 1590 case FORMAT_INTZ: return 4; 1591 case FORMAT_S8: return 1; 1592 case FORMAT_YV12_BT601: return 1; // Y plane only 1593 case FORMAT_YV12_BT709: return 1; // Y plane only 1594 case FORMAT_YV12_JFIF: return 1; // Y plane only 1595 default: 1596 ASSERT(false); 1597 } 1598 1599 return 0; 1600 } 1601 1602 int Surface::pitchB(int width, Format format, bool target) 1603 { 1604 if(target || isDepth(format) || isStencil(format)) 1605 { 1606 width = align(width, 2); 1607 } 1608 1609 switch(format) 1610 { 1611 #if S3TC_SUPPORT 1612 case FORMAT_DXT1: 1613 #endif 1614 case FORMAT_ETC1: 1615 case FORMAT_R11_EAC: 1616 case FORMAT_SIGNED_R11_EAC: 1617 case FORMAT_RGB8_ETC2: 1618 case FORMAT_SRGB8_ETC2: 1619 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1620 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1621 return 8 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per 4 rows 1622 case FORMAT_RG11_EAC: 1623 case FORMAT_SIGNED_RG11_EAC: 1624 case FORMAT_RGBA8_ETC2_EAC: 1625 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1626 case FORMAT_RGBA_ASTC_4x4_KHR: 1627 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1628 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows 1629 case FORMAT_RGBA_ASTC_5x4_KHR: 1630 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1631 case FORMAT_RGBA_ASTC_5x5_KHR: 1632 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1633 return 16 * ((width + 4) / 5); 1634 case FORMAT_RGBA_ASTC_6x5_KHR: 1635 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1636 case FORMAT_RGBA_ASTC_6x6_KHR: 1637 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1638 return 16 * ((width + 5) / 6); 1639 case FORMAT_RGBA_ASTC_8x5_KHR: 1640 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1641 case FORMAT_RGBA_ASTC_8x6_KHR: 1642 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1643 case FORMAT_RGBA_ASTC_8x8_KHR: 1644 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1645 return 16 * ((width + 7) / 8); 1646 case FORMAT_RGBA_ASTC_10x5_KHR: 1647 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1648 case FORMAT_RGBA_ASTC_10x6_KHR: 1649 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1650 case FORMAT_RGBA_ASTC_10x8_KHR: 1651 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1652 case FORMAT_RGBA_ASTC_10x10_KHR: 1653 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1654 return 16 * ((width + 9) / 10); 1655 case FORMAT_RGBA_ASTC_12x10_KHR: 1656 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1657 case FORMAT_RGBA_ASTC_12x12_KHR: 1658 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 1659 return 16 * ((width + 11) / 12); 1660 #if S3TC_SUPPORT 1661 case FORMAT_DXT3: 1662 case FORMAT_DXT5: 1663 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows 1664 #endif 1665 case FORMAT_ATI1: 1666 return 2 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per row 1667 case FORMAT_ATI2: 1668 return 4 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per row 1669 case FORMAT_YV12_BT601: 1670 case FORMAT_YV12_BT709: 1671 case FORMAT_YV12_JFIF: 1672 return align(width, 16); 1673 default: 1674 return bytes(format) * width; 1675 } 1676 } 1677 1678 int Surface::pitchP(int width, Format format, bool target) 1679 { 1680 int B = bytes(format); 1681 1682 return B > 0 ? pitchB(width, format, target) / B : 0; 1683 } 1684 1685 int Surface::sliceB(int width, int height, Format format, bool target) 1686 { 1687 if(target || isDepth(format) || isStencil(format)) 1688 { 1689 height = ((height + 1) & ~1); 1690 } 1691 1692 switch(format) 1693 { 1694 #if S3TC_SUPPORT 1695 case FORMAT_DXT1: 1696 case FORMAT_DXT3: 1697 case FORMAT_DXT5: 1698 #endif 1699 case FORMAT_ETC1: 1700 case FORMAT_R11_EAC: 1701 case FORMAT_SIGNED_R11_EAC: 1702 case FORMAT_RG11_EAC: 1703 case FORMAT_SIGNED_RG11_EAC: 1704 case FORMAT_RGB8_ETC2: 1705 case FORMAT_SRGB8_ETC2: 1706 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1707 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1708 case FORMAT_RGBA8_ETC2_EAC: 1709 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1710 case FORMAT_RGBA_ASTC_4x4_KHR: 1711 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1712 case FORMAT_RGBA_ASTC_5x4_KHR: 1713 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1714 return pitchB(width, format, target) * ((height + 3) / 4); // Pitch computed per 4 rows 1715 case FORMAT_RGBA_ASTC_5x5_KHR: 1716 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1717 case FORMAT_RGBA_ASTC_6x5_KHR: 1718 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1719 case FORMAT_RGBA_ASTC_8x5_KHR: 1720 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1721 case FORMAT_RGBA_ASTC_10x5_KHR: 1722 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1723 return pitchB(width, format, target) * ((height + 4) / 5); // Pitch computed per 5 rows 1724 case FORMAT_RGBA_ASTC_6x6_KHR: 1725 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1726 case FORMAT_RGBA_ASTC_8x6_KHR: 1727 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1728 case FORMAT_RGBA_ASTC_10x6_KHR: 1729 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1730 return pitchB(width, format, target) * ((height + 5) / 6); // Pitch computed per 6 rows 1731 case FORMAT_RGBA_ASTC_8x8_KHR: 1732 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1733 case FORMAT_RGBA_ASTC_10x8_KHR: 1734 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1735 return pitchB(width, format, target) * ((height + 7) / 8); // Pitch computed per 8 rows 1736 case FORMAT_RGBA_ASTC_10x10_KHR: 1737 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1738 case FORMAT_RGBA_ASTC_12x10_KHR: 1739 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1740 return pitchB(width, format, target) * ((height + 9) / 10); // Pitch computed per 10 rows 1741 case FORMAT_RGBA_ASTC_12x12_KHR: 1742 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 1743 return pitchB(width, format, target) * ((height + 11) / 12); // Pitch computed per 12 rows 1744 case FORMAT_ATI1: 1745 case FORMAT_ATI2: 1746 default: 1747 return pitchB(width, format, target) * height; // Pitch computed per row 1748 } 1749 } 1750 1751 int Surface::sliceP(int width, int height, Format format, bool target) 1752 { 1753 int B = bytes(format); 1754 1755 return B > 0 ? sliceB(width, height, format, target) / B : 0; 1756 } 1757 1758 void Surface::update(Buffer &destination, Buffer &source) 1759 { 1760 // ASSERT(source.lock != LOCK_UNLOCKED); 1761 // ASSERT(destination.lock != LOCK_UNLOCKED); 1762 1763 if(destination.buffer != source.buffer) 1764 { 1765 ASSERT(source.dirty && !destination.dirty); 1766 1767 switch(source.format) 1768 { 1769 case FORMAT_R8G8B8: decodeR8G8B8(destination, source); break; // FIXME: Check destination format 1770 case FORMAT_X1R5G5B5: decodeX1R5G5B5(destination, source); break; // FIXME: Check destination format 1771 case FORMAT_A1R5G5B5: decodeA1R5G5B5(destination, source); break; // FIXME: Check destination format 1772 case FORMAT_X4R4G4B4: decodeX4R4G4B4(destination, source); break; // FIXME: Check destination format 1773 case FORMAT_A4R4G4B4: decodeA4R4G4B4(destination, source); break; // FIXME: Check destination format 1774 case FORMAT_P8: decodeP8(destination, source); break; // FIXME: Check destination format 1775 #if S3TC_SUPPORT 1776 case FORMAT_DXT1: decodeDXT1(destination, source); break; // FIXME: Check destination format 1777 case FORMAT_DXT3: decodeDXT3(destination, source); break; // FIXME: Check destination format 1778 case FORMAT_DXT5: decodeDXT5(destination, source); break; // FIXME: Check destination format 1779 #endif 1780 case FORMAT_ATI1: decodeATI1(destination, source); break; // FIXME: Check destination format 1781 case FORMAT_ATI2: decodeATI2(destination, source); break; // FIXME: Check destination format 1782 case FORMAT_R11_EAC: decodeEAC(destination, source, 1, false); break; // FIXME: Check destination format 1783 case FORMAT_SIGNED_R11_EAC: decodeEAC(destination, source, 1, true); break; // FIXME: Check destination format 1784 case FORMAT_RG11_EAC: decodeEAC(destination, source, 2, false); break; // FIXME: Check destination format 1785 case FORMAT_SIGNED_RG11_EAC: decodeEAC(destination, source, 2, true); break; // FIXME: Check destination format 1786 case FORMAT_ETC1: 1787 case FORMAT_RGB8_ETC2: decodeETC2(destination, source, 0, false); break; // FIXME: Check destination format 1788 case FORMAT_SRGB8_ETC2: decodeETC2(destination, source, 0, true); break; // FIXME: Check destination format 1789 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, false); break; // FIXME: Check destination format 1790 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, true); break; // FIXME: Check destination format 1791 case FORMAT_RGBA8_ETC2_EAC: decodeETC2(destination, source, 8, false); break; // FIXME: Check destination format 1792 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: decodeETC2(destination, source, 8, true); break; // FIXME: Check destination format 1793 case FORMAT_RGBA_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, false); break; // FIXME: Check destination format 1794 case FORMAT_RGBA_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, false); break; // FIXME: Check destination format 1795 case FORMAT_RGBA_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, false); break; // FIXME: Check destination format 1796 case FORMAT_RGBA_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, false); break; // FIXME: Check destination format 1797 case FORMAT_RGBA_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, false); break; // FIXME: Check destination format 1798 case FORMAT_RGBA_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, false); break; // FIXME: Check destination format 1799 case FORMAT_RGBA_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, false); break; // FIXME: Check destination format 1800 case FORMAT_RGBA_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, false); break; // FIXME: Check destination format 1801 case FORMAT_RGBA_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, false); break; // FIXME: Check destination format 1802 case FORMAT_RGBA_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, false); break; // FIXME: Check destination format 1803 case FORMAT_RGBA_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, false); break; // FIXME: Check destination format 1804 case FORMAT_RGBA_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, false); break; // FIXME: Check destination format 1805 case FORMAT_RGBA_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, false); break; // FIXME: Check destination format 1806 case FORMAT_RGBA_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, false); break; // FIXME: Check destination format 1807 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, true); break; // FIXME: Check destination format 1808 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, true); break; // FIXME: Check destination format 1809 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, true); break; // FIXME: Check destination format 1810 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, true); break; // FIXME: Check destination format 1811 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, true); break; // FIXME: Check destination format 1812 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, true); break; // FIXME: Check destination format 1813 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, true); break; // FIXME: Check destination format 1814 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, true); break; // FIXME: Check destination format 1815 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, true); break; // FIXME: Check destination format 1816 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, true); break; // FIXME: Check destination format 1817 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, true); break; // FIXME: Check destination format 1818 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, true); break; // FIXME: Check destination format 1819 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, true); break; // FIXME: Check destination format 1820 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, true); break; // FIXME: Check destination format 1821 default: genericUpdate(destination, source); break; 1822 } 1823 } 1824 } 1825 1826 void Surface::genericUpdate(Buffer &destination, Buffer &source) 1827 { 1828 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1829 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1830 1831 int depth = min(destination.depth, source.depth); 1832 int height = min(destination.height, source.height); 1833 int width = min(destination.width, source.width); 1834 int rowBytes = width * source.bytes; 1835 1836 for(int z = 0; z < depth; z++) 1837 { 1838 unsigned char *sourceRow = sourceSlice; 1839 unsigned char *destinationRow = destinationSlice; 1840 1841 for(int y = 0; y < height; y++) 1842 { 1843 if(source.format == destination.format) 1844 { 1845 memcpy(destinationRow, sourceRow, rowBytes); 1846 } 1847 else 1848 { 1849 unsigned char *sourceElement = sourceRow; 1850 unsigned char *destinationElement = destinationRow; 1851 1852 for(int x = 0; x < width; x++) 1853 { 1854 Color<float> color = source.read(sourceElement); 1855 destination.write(destinationElement, color); 1856 1857 sourceElement += source.bytes; 1858 destinationElement += destination.bytes; 1859 } 1860 } 1861 1862 sourceRow += source.pitchB; 1863 destinationRow += destination.pitchB; 1864 } 1865 1866 sourceSlice += source.sliceB; 1867 destinationSlice += destination.sliceB; 1868 } 1869 } 1870 1871 void Surface::decodeR8G8B8(Buffer &destination, const Buffer &source) 1872 { 1873 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1874 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1875 1876 for(int z = 0; z < destination.depth && z < source.depth; z++) 1877 { 1878 unsigned char *sourceRow = sourceSlice; 1879 unsigned char *destinationRow = destinationSlice; 1880 1881 for(int y = 0; y < destination.height && y < source.height; y++) 1882 { 1883 unsigned char *sourceElement = sourceRow; 1884 unsigned char *destinationElement = destinationRow; 1885 1886 for(int x = 0; x < destination.width && x < source.width; x++) 1887 { 1888 unsigned int b = sourceElement[0]; 1889 unsigned int g = sourceElement[1]; 1890 unsigned int r = sourceElement[2]; 1891 1892 *(unsigned int*)destinationElement = 0xFF000000 | (r << 16) | (g << 8) | (b << 0); 1893 1894 sourceElement += source.bytes; 1895 destinationElement += destination.bytes; 1896 } 1897 1898 sourceRow += source.pitchB; 1899 destinationRow += destination.pitchB; 1900 } 1901 1902 sourceSlice += source.sliceB; 1903 destinationSlice += destination.sliceB; 1904 } 1905 } 1906 1907 void Surface::decodeX1R5G5B5(Buffer &destination, const Buffer &source) 1908 { 1909 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1910 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1911 1912 for(int z = 0; z < destination.depth && z < source.depth; z++) 1913 { 1914 unsigned char *sourceRow = sourceSlice; 1915 unsigned char *destinationRow = destinationSlice; 1916 1917 for(int y = 0; y < destination.height && y < source.height; y++) 1918 { 1919 unsigned char *sourceElement = sourceRow; 1920 unsigned char *destinationElement = destinationRow; 1921 1922 for(int x = 0; x < destination.width && x < source.width; x++) 1923 { 1924 unsigned int xrgb = *(unsigned short*)sourceElement; 1925 1926 unsigned int r = (((xrgb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000; 1927 unsigned int g = (((xrgb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00; 1928 unsigned int b = (((xrgb & 0x001F) * 2106 + 0x80) >> 8); 1929 1930 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b; 1931 1932 sourceElement += source.bytes; 1933 destinationElement += destination.bytes; 1934 } 1935 1936 sourceRow += source.pitchB; 1937 destinationRow += destination.pitchB; 1938 } 1939 1940 sourceSlice += source.sliceB; 1941 destinationSlice += destination.sliceB; 1942 } 1943 } 1944 1945 void Surface::decodeA1R5G5B5(Buffer &destination, const Buffer &source) 1946 { 1947 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1948 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1949 1950 for(int z = 0; z < destination.depth && z < source.depth; z++) 1951 { 1952 unsigned char *sourceRow = sourceSlice; 1953 unsigned char *destinationRow = destinationSlice; 1954 1955 for(int y = 0; y < destination.height && y < source.height; y++) 1956 { 1957 unsigned char *sourceElement = sourceRow; 1958 unsigned char *destinationElement = destinationRow; 1959 1960 for(int x = 0; x < destination.width && x < source.width; x++) 1961 { 1962 unsigned int argb = *(unsigned short*)sourceElement; 1963 1964 unsigned int a = (argb & 0x8000) * 130560; 1965 unsigned int r = (((argb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000; 1966 unsigned int g = (((argb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00; 1967 unsigned int b = (((argb & 0x001F) * 2106 + 0x80) >> 8); 1968 1969 *(unsigned int*)destinationElement = a | r | g | b; 1970 1971 sourceElement += source.bytes; 1972 destinationElement += destination.bytes; 1973 } 1974 1975 sourceRow += source.pitchB; 1976 destinationRow += destination.pitchB; 1977 } 1978 1979 sourceSlice += source.sliceB; 1980 destinationSlice += destination.sliceB; 1981 } 1982 } 1983 1984 void Surface::decodeX4R4G4B4(Buffer &destination, const Buffer &source) 1985 { 1986 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1987 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1988 1989 for(int z = 0; z < destination.depth && z < source.depth; z++) 1990 { 1991 unsigned char *sourceRow = sourceSlice; 1992 unsigned char *destinationRow = destinationSlice; 1993 1994 for(int y = 0; y < destination.height && y < source.height; y++) 1995 { 1996 unsigned char *sourceElement = sourceRow; 1997 unsigned char *destinationElement = destinationRow; 1998 1999 for(int x = 0; x < destination.width && x < source.width; x++) 2000 { 2001 unsigned int xrgb = *(unsigned short*)sourceElement; 2002 2003 unsigned int r = ((xrgb & 0x0F00) * 0x00001100) & 0x00FF0000; 2004 unsigned int g = ((xrgb & 0x00F0) * 0x00000110) & 0x0000FF00; 2005 unsigned int b = (xrgb & 0x000F) * 0x00000011; 2006 2007 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b; 2008 2009 sourceElement += source.bytes; 2010 destinationElement += destination.bytes; 2011 } 2012 2013 sourceRow += source.pitchB; 2014 destinationRow += destination.pitchB; 2015 } 2016 2017 sourceSlice += source.sliceB; 2018 destinationSlice += destination.sliceB; 2019 } 2020 } 2021 2022 void Surface::decodeA4R4G4B4(Buffer &destination, const Buffer &source) 2023 { 2024 unsigned char *sourceSlice = (unsigned char*)source.buffer; 2025 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 2026 2027 for(int z = 0; z < destination.depth && z < source.depth; z++) 2028 { 2029 unsigned char *sourceRow = sourceSlice; 2030 unsigned char *destinationRow = destinationSlice; 2031 2032 for(int y = 0; y < destination.height && y < source.height; y++) 2033 { 2034 unsigned char *sourceElement = sourceRow; 2035 unsigned char *destinationElement = destinationRow; 2036 2037 for(int x = 0; x < destination.width && x < source.width; x++) 2038 { 2039 unsigned int argb = *(unsigned short*)sourceElement; 2040 2041 unsigned int a = ((argb & 0xF000) * 0x00011000) & 0xFF000000; 2042 unsigned int r = ((argb & 0x0F00) * 0x00001100) & 0x00FF0000; 2043 unsigned int g = ((argb & 0x00F0) * 0x00000110) & 0x0000FF00; 2044 unsigned int b = (argb & 0x000F) * 0x00000011; 2045 2046 *(unsigned int*)destinationElement = a | r | g | b; 2047 2048 sourceElement += source.bytes; 2049 destinationElement += destination.bytes; 2050 } 2051 2052 sourceRow += source.pitchB; 2053 destinationRow += destination.pitchB; 2054 } 2055 2056 sourceSlice += source.sliceB; 2057 destinationSlice += destination.sliceB; 2058 } 2059 } 2060 2061 void Surface::decodeP8(Buffer &destination, const Buffer &source) 2062 { 2063 unsigned char *sourceSlice = (unsigned char*)source.buffer; 2064 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 2065 2066 for(int z = 0; z < destination.depth && z < source.depth; z++) 2067 { 2068 unsigned char *sourceRow = sourceSlice; 2069 unsigned char *destinationRow = destinationSlice; 2070 2071 for(int y = 0; y < destination.height && y < source.height; y++) 2072 { 2073 unsigned char *sourceElement = sourceRow; 2074 unsigned char *destinationElement = destinationRow; 2075 2076 for(int x = 0; x < destination.width && x < source.width; x++) 2077 { 2078 unsigned int abgr = palette[*(unsigned char*)sourceElement]; 2079 2080 unsigned int r = (abgr & 0x000000FF) << 16; 2081 unsigned int g = (abgr & 0x0000FF00) << 0; 2082 unsigned int b = (abgr & 0x00FF0000) >> 16; 2083 unsigned int a = (abgr & 0xFF000000) >> 0; 2084 2085 *(unsigned int*)destinationElement = a | r | g | b; 2086 2087 sourceElement += source.bytes; 2088 destinationElement += destination.bytes; 2089 } 2090 2091 sourceRow += source.pitchB; 2092 destinationRow += destination.pitchB; 2093 } 2094 2095 sourceSlice += source.sliceB; 2096 destinationSlice += destination.sliceB; 2097 } 2098 } 2099 2100#if S3TC_SUPPORT 2101 void Surface::decodeDXT1(Buffer &internal, const Buffer &external) 2102 { 2103 unsigned int *destSlice = (unsigned int*)internal.buffer; 2104 const DXT1 *source = (const DXT1*)external.buffer; 2105 2106 for(int z = 0; z < external.depth; z++) 2107 { 2108 unsigned int *dest = destSlice; 2109 2110 for(int y = 0; y < external.height; y += 4) 2111 { 2112 for(int x = 0; x < external.width; x += 4) 2113 { 2114 Color<byte> c[4]; 2115 2116 c[0] = source->c0; 2117 c[1] = source->c1; 2118 2119 if(source->c0 > source->c1) // No transparency 2120 { 2121 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2122 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2123 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2124 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2125 c[2].a = 0xFF; 2126 2127 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2128 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2129 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2130 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2131 c[3].a = 0xFF; 2132 } 2133 else // c3 transparent 2134 { 2135 // c2 = 1 / 2 * c0 + 1 / 2 * c1 2136 c[2].r = (byte)(((word)c[0].r + (word)c[1].r) / 2); 2137 c[2].g = (byte)(((word)c[0].g + (word)c[1].g) / 2); 2138 c[2].b = (byte)(((word)c[0].b + (word)c[1].b) / 2); 2139 c[2].a = 0xFF; 2140 2141 c[3].r = 0; 2142 c[3].g = 0; 2143 c[3].b = 0; 2144 c[3].a = 0; 2145 } 2146 2147 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2148 { 2149 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2150 { 2151 dest[(x + i) + (y + j) * internal.width] = c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4]; 2152 } 2153 } 2154 2155 source++; 2156 } 2157 } 2158 2159 (byte*&)destSlice += internal.sliceB; 2160 } 2161 } 2162 2163 void Surface::decodeDXT3(Buffer &internal, const Buffer &external) 2164 { 2165 unsigned int *destSlice = (unsigned int*)internal.buffer; 2166 const DXT3 *source = (const DXT3*)external.buffer; 2167 2168 for(int z = 0; z < external.depth; z++) 2169 { 2170 unsigned int *dest = destSlice; 2171 2172 for(int y = 0; y < external.height; y += 4) 2173 { 2174 for(int x = 0; x < external.width; x += 4) 2175 { 2176 Color<byte> c[4]; 2177 2178 c[0] = source->c0; 2179 c[1] = source->c1; 2180 2181 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2182 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2183 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2184 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2185 2186 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2187 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2188 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2189 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2190 2191 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2192 { 2193 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2194 { 2195 unsigned int a = (unsigned int)(source->a >> 4 * (i + j * 4)) & 0x0F; 2196 unsigned int color = (c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | ((a << 28) + (a << 24)); 2197 2198 dest[(x + i) + (y + j) * internal.width] = color; 2199 } 2200 } 2201 2202 source++; 2203 } 2204 } 2205 2206 (byte*&)destSlice += internal.sliceB; 2207 } 2208 } 2209 2210 void Surface::decodeDXT5(Buffer &internal, const Buffer &external) 2211 { 2212 unsigned int *destSlice = (unsigned int*)internal.buffer; 2213 const DXT5 *source = (const DXT5*)external.buffer; 2214 2215 for(int z = 0; z < external.depth; z++) 2216 { 2217 unsigned int *dest = destSlice; 2218 2219 for(int y = 0; y < external.height; y += 4) 2220 { 2221 for(int x = 0; x < external.width; x += 4) 2222 { 2223 Color<byte> c[4]; 2224 2225 c[0] = source->c0; 2226 c[1] = source->c1; 2227 2228 // c2 = 2 / 3 * c0 + 1 / 3 * c1 2229 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 2230 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 2231 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 2232 2233 // c3 = 1 / 3 * c0 + 2 / 3 * c1 2234 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 2235 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 2236 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 2237 2238 byte a[8]; 2239 2240 a[0] = source->a0; 2241 a[1] = source->a1; 2242 2243 if(a[0] > a[1]) 2244 { 2245 a[2] = (byte)((6 * (word)a[0] + 1 * (word)a[1] + 3) / 7); 2246 a[3] = (byte)((5 * (word)a[0] + 2 * (word)a[1] + 3) / 7); 2247 a[4] = (byte)((4 * (word)a[0] + 3 * (word)a[1] + 3) / 7); 2248 a[5] = (byte)((3 * (word)a[0] + 4 * (word)a[1] + 3) / 7); 2249 a[6] = (byte)((2 * (word)a[0] + 5 * (word)a[1] + 3) / 7); 2250 a[7] = (byte)((1 * (word)a[0] + 6 * (word)a[1] + 3) / 7); 2251 } 2252 else 2253 { 2254 a[2] = (byte)((4 * (word)a[0] + 1 * (word)a[1] + 2) / 5); 2255 a[3] = (byte)((3 * (word)a[0] + 2 * (word)a[1] + 2) / 5); 2256 a[4] = (byte)((2 * (word)a[0] + 3 * (word)a[1] + 2) / 5); 2257 a[5] = (byte)((1 * (word)a[0] + 4 * (word)a[1] + 2) / 5); 2258 a[6] = 0; 2259 a[7] = 0xFF; 2260 } 2261 2262 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2263 { 2264 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2265 { 2266 unsigned int alpha = (unsigned int)a[(unsigned int)(source->alut >> (16 + 3 * (i + j * 4))) % 8] << 24; 2267 unsigned int color = (c[(source->clut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | alpha; 2268 2269 dest[(x + i) + (y + j) * internal.width] = color; 2270 } 2271 } 2272 2273 source++; 2274 } 2275 } 2276 2277 (byte*&)destSlice += internal.sliceB; 2278 } 2279 } 2280#endif 2281 2282 void Surface::decodeATI1(Buffer &internal, const Buffer &external) 2283 { 2284 byte *destSlice = (byte*)internal.buffer; 2285 const ATI1 *source = (const ATI1*)external.buffer; 2286 2287 for(int z = 0; z < external.depth; z++) 2288 { 2289 byte *dest = destSlice; 2290 2291 for(int y = 0; y < external.height; y += 4) 2292 { 2293 for(int x = 0; x < external.width; x += 4) 2294 { 2295 byte r[8]; 2296 2297 r[0] = source->r0; 2298 r[1] = source->r1; 2299 2300 if(r[0] > r[1]) 2301 { 2302 r[2] = (byte)((6 * (word)r[0] + 1 * (word)r[1] + 3) / 7); 2303 r[3] = (byte)((5 * (word)r[0] + 2 * (word)r[1] + 3) / 7); 2304 r[4] = (byte)((4 * (word)r[0] + 3 * (word)r[1] + 3) / 7); 2305 r[5] = (byte)((3 * (word)r[0] + 4 * (word)r[1] + 3) / 7); 2306 r[6] = (byte)((2 * (word)r[0] + 5 * (word)r[1] + 3) / 7); 2307 r[7] = (byte)((1 * (word)r[0] + 6 * (word)r[1] + 3) / 7); 2308 } 2309 else 2310 { 2311 r[2] = (byte)((4 * (word)r[0] + 1 * (word)r[1] + 2) / 5); 2312 r[3] = (byte)((3 * (word)r[0] + 2 * (word)r[1] + 2) / 5); 2313 r[4] = (byte)((2 * (word)r[0] + 3 * (word)r[1] + 2) / 5); 2314 r[5] = (byte)((1 * (word)r[0] + 4 * (word)r[1] + 2) / 5); 2315 r[6] = 0; 2316 r[7] = 0xFF; 2317 } 2318 2319 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2320 { 2321 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2322 { 2323 dest[(x + i) + (y + j) * internal.width] = r[(unsigned int)(source->rlut >> (16 + 3 * (i + j * 4))) % 8]; 2324 } 2325 } 2326 2327 source++; 2328 } 2329 } 2330 2331 destSlice += internal.sliceB; 2332 } 2333 } 2334 2335 void Surface::decodeATI2(Buffer &internal, const Buffer &external) 2336 { 2337 word *destSlice = (word*)internal.buffer; 2338 const ATI2 *source = (const ATI2*)external.buffer; 2339 2340 for(int z = 0; z < external.depth; z++) 2341 { 2342 word *dest = destSlice; 2343 2344 for(int y = 0; y < external.height; y += 4) 2345 { 2346 for(int x = 0; x < external.width; x += 4) 2347 { 2348 byte X[8]; 2349 2350 X[0] = source->x0; 2351 X[1] = source->x1; 2352 2353 if(X[0] > X[1]) 2354 { 2355 X[2] = (byte)((6 * (word)X[0] + 1 * (word)X[1] + 3) / 7); 2356 X[3] = (byte)((5 * (word)X[0] + 2 * (word)X[1] + 3) / 7); 2357 X[4] = (byte)((4 * (word)X[0] + 3 * (word)X[1] + 3) / 7); 2358 X[5] = (byte)((3 * (word)X[0] + 4 * (word)X[1] + 3) / 7); 2359 X[6] = (byte)((2 * (word)X[0] + 5 * (word)X[1] + 3) / 7); 2360 X[7] = (byte)((1 * (word)X[0] + 6 * (word)X[1] + 3) / 7); 2361 } 2362 else 2363 { 2364 X[2] = (byte)((4 * (word)X[0] + 1 * (word)X[1] + 2) / 5); 2365 X[3] = (byte)((3 * (word)X[0] + 2 * (word)X[1] + 2) / 5); 2366 X[4] = (byte)((2 * (word)X[0] + 3 * (word)X[1] + 2) / 5); 2367 X[5] = (byte)((1 * (word)X[0] + 4 * (word)X[1] + 2) / 5); 2368 X[6] = 0; 2369 X[7] = 0xFF; 2370 } 2371 2372 byte Y[8]; 2373 2374 Y[0] = source->y0; 2375 Y[1] = source->y1; 2376 2377 if(Y[0] > Y[1]) 2378 { 2379 Y[2] = (byte)((6 * (word)Y[0] + 1 * (word)Y[1] + 3) / 7); 2380 Y[3] = (byte)((5 * (word)Y[0] + 2 * (word)Y[1] + 3) / 7); 2381 Y[4] = (byte)((4 * (word)Y[0] + 3 * (word)Y[1] + 3) / 7); 2382 Y[5] = (byte)((3 * (word)Y[0] + 4 * (word)Y[1] + 3) / 7); 2383 Y[6] = (byte)((2 * (word)Y[0] + 5 * (word)Y[1] + 3) / 7); 2384 Y[7] = (byte)((1 * (word)Y[0] + 6 * (word)Y[1] + 3) / 7); 2385 } 2386 else 2387 { 2388 Y[2] = (byte)((4 * (word)Y[0] + 1 * (word)Y[1] + 2) / 5); 2389 Y[3] = (byte)((3 * (word)Y[0] + 2 * (word)Y[1] + 2) / 5); 2390 Y[4] = (byte)((2 * (word)Y[0] + 3 * (word)Y[1] + 2) / 5); 2391 Y[5] = (byte)((1 * (word)Y[0] + 4 * (word)Y[1] + 2) / 5); 2392 Y[6] = 0; 2393 Y[7] = 0xFF; 2394 } 2395 2396 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2397 { 2398 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2399 { 2400 word r = X[(unsigned int)(source->xlut >> (16 + 3 * (i + j * 4))) % 8]; 2401 word g = Y[(unsigned int)(source->ylut >> (16 + 3 * (i + j * 4))) % 8]; 2402 2403 dest[(x + i) + (y + j) * internal.width] = (g << 8) + r; 2404 } 2405 } 2406 2407 source++; 2408 } 2409 } 2410 2411 (byte*&)destSlice += internal.sliceB; 2412 } 2413 } 2414 2415 void Surface::decodeETC2(Buffer &internal, const Buffer &external, int nbAlphaBits, bool isSRGB) 2416 { 2417 ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes, 2418 (nbAlphaBits == 8) ? ETC_Decoder::ETC_RGBA : ((nbAlphaBits == 1) ? ETC_Decoder::ETC_RGB_PUNCHTHROUGH_ALPHA : ETC_Decoder::ETC_RGB)); 2419 2420 if(isSRGB) 2421 { 2422 static byte sRGBtoLinearTable[256]; 2423 static bool sRGBtoLinearTableDirty = true; 2424 if(sRGBtoLinearTableDirty) 2425 { 2426 for(int i = 0; i < 256; i++) 2427 { 2428 sRGBtoLinearTable[i] = static_cast<byte>(sRGBtoLinear(static_cast<float>(i) / 255.0f) * 255.0f + 0.5f); 2429 } 2430 sRGBtoLinearTableDirty = false; 2431 } 2432 2433 // Perform sRGB conversion in place after decoding 2434 byte* src = (byte*)internal.buffer; 2435 for(int y = 0; y < internal.height; y++) 2436 { 2437 byte* srcRow = src + y * internal.pitchB; 2438 for(int x = 0; x < internal.width; x++) 2439 { 2440 byte* srcPix = srcRow + x * internal.bytes; 2441 for(int i = 0; i < 3; i++) 2442 { 2443 srcPix[i] = sRGBtoLinearTable[srcPix[i]]; 2444 } 2445 } 2446 } 2447 } 2448 } 2449 2450 void Surface::decodeEAC(Buffer &internal, const Buffer &external, int nbChannels, bool isSigned) 2451 { 2452 ASSERT(nbChannels == 1 || nbChannels == 2); 2453 2454 ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes, 2455 (nbChannels == 1) ? (isSigned ? ETC_Decoder::ETC_R_SIGNED : ETC_Decoder::ETC_R_UNSIGNED) : (isSigned ? ETC_Decoder::ETC_RG_SIGNED : ETC_Decoder::ETC_RG_UNSIGNED)); 2456 2457 // FIXME: We convert signed data to float, until signed integer internal formats are supported 2458 // This code can be removed if signed ETC2 images are decoded to internal 8 bit signed R/RG formats 2459 if(isSigned) 2460 { 2461 sbyte* src = (sbyte*)internal.buffer; 2462 2463 for(int y = 0; y < internal.height; y++) 2464 { 2465 sbyte* srcRow = src + y * internal.pitchB; 2466 for(int x = internal.width - 1; x >= 0; x--) 2467 { 2468 int dx = x & 0xFFFFFFFC; 2469 int mx = x - dx; 2470 sbyte* srcPix = srcRow + dx * internal.bytes + mx * nbChannels; 2471 float* dstPix = (float*)(srcRow + x * internal.bytes); 2472 for(int c = nbChannels - 1; c >= 0; c--) 2473 { 2474 static const float normalization = 1.0f / 127.875f; 2475 dstPix[c] = clamp(static_cast<float>(srcPix[c]) * normalization, -1.0f, 1.0f); 2476 } 2477 } 2478 } 2479 } 2480 } 2481 2482 void Surface::decodeASTC(Buffer &internal, const Buffer &external, int xBlockSize, int yBlockSize, int zBlockSize, bool isSRGB) 2483 { 2484 } 2485 2486 unsigned int Surface::size(int width, int height, int depth, Format format) 2487 { 2488 // Dimensions rounded up to multiples of 4, used for compressed formats 2489 int width4 = align(width, 4); 2490 int height4 = align(height, 4); 2491 2492 switch(format) 2493 { 2494 #if S3TC_SUPPORT 2495 case FORMAT_DXT1: 2496 #endif 2497 case FORMAT_ATI1: 2498 case FORMAT_ETC1: 2499 case FORMAT_R11_EAC: 2500 case FORMAT_SIGNED_R11_EAC: 2501 case FORMAT_RGB8_ETC2: 2502 case FORMAT_SRGB8_ETC2: 2503 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2504 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2505 return width4 * height4 * depth / 2; 2506 #if S3TC_SUPPORT 2507 case FORMAT_DXT3: 2508 case FORMAT_DXT5: 2509 #endif 2510 case FORMAT_ATI2: 2511 case FORMAT_RG11_EAC: 2512 case FORMAT_SIGNED_RG11_EAC: 2513 case FORMAT_RGBA8_ETC2_EAC: 2514 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 2515 case FORMAT_RGBA_ASTC_4x4_KHR: 2516 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 2517 return width4 * height4 * depth; 2518 case FORMAT_RGBA_ASTC_5x4_KHR: 2519 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 2520 return align(width, 5) * height4 * depth; 2521 case FORMAT_RGBA_ASTC_5x5_KHR: 2522 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 2523 return align(width, 5) * align(height, 5) * depth; 2524 case FORMAT_RGBA_ASTC_6x5_KHR: 2525 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 2526 return align(width, 6) * align(height, 5) * depth; 2527 case FORMAT_RGBA_ASTC_6x6_KHR: 2528 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 2529 return align(width, 6) * align(height, 6) * depth; 2530 case FORMAT_RGBA_ASTC_8x5_KHR: 2531 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 2532 return align(width, 8) * align(height, 5) * depth; 2533 case FORMAT_RGBA_ASTC_8x6_KHR: 2534 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 2535 return align(width, 8) * align(height, 6) * depth; 2536 case FORMAT_RGBA_ASTC_8x8_KHR: 2537 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 2538 return align(width, 8) * align(height, 8) * depth; 2539 case FORMAT_RGBA_ASTC_10x5_KHR: 2540 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 2541 return align(width, 10) * align(height, 5) * depth; 2542 case FORMAT_RGBA_ASTC_10x6_KHR: 2543 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 2544 return align(width, 10) * align(height, 6) * depth; 2545 case FORMAT_RGBA_ASTC_10x8_KHR: 2546 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 2547 return align(width, 10) * align(height, 8) * depth; 2548 case FORMAT_RGBA_ASTC_10x10_KHR: 2549 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 2550 return align(width, 10) * align(height, 10) * depth; 2551 case FORMAT_RGBA_ASTC_12x10_KHR: 2552 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 2553 return align(width, 12) * align(height, 10) * depth; 2554 case FORMAT_RGBA_ASTC_12x12_KHR: 2555 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 2556 return align(width, 12) * align(height, 12) * depth; 2557 case FORMAT_YV12_BT601: 2558 case FORMAT_YV12_BT709: 2559 case FORMAT_YV12_JFIF: 2560 { 2561 unsigned int YStride = align(width, 16); 2562 unsigned int YSize = YStride * height; 2563 unsigned int CStride = align(YStride / 2, 16); 2564 unsigned int CSize = CStride * height / 2; 2565 2566 return YSize + 2 * CSize; 2567 } 2568 default: 2569 return bytes(format) * width * height * depth; 2570 } 2571 2572 return 0; 2573 } 2574 2575 bool Surface::isStencil(Format format) 2576 { 2577 switch(format) 2578 { 2579 case FORMAT_D32: 2580 case FORMAT_D16: 2581 case FORMAT_D24X8: 2582 case FORMAT_D32F: 2583 case FORMAT_D32F_COMPLEMENTARY: 2584 case FORMAT_D32F_LOCKABLE: 2585 return false; 2586 case FORMAT_D24S8: 2587 case FORMAT_D24FS8: 2588 case FORMAT_S8: 2589 case FORMAT_DF24S8: 2590 case FORMAT_DF16S8: 2591 case FORMAT_D32FS8_TEXTURE: 2592 case FORMAT_D32FS8_SHADOW: 2593 case FORMAT_INTZ: 2594 return true; 2595 default: 2596 return false; 2597 } 2598 } 2599 2600 bool Surface::isDepth(Format format) 2601 { 2602 switch(format) 2603 { 2604 case FORMAT_D32: 2605 case FORMAT_D16: 2606 case FORMAT_D24X8: 2607 case FORMAT_D24S8: 2608 case FORMAT_D24FS8: 2609 case FORMAT_D32F: 2610 case FORMAT_D32F_COMPLEMENTARY: 2611 case FORMAT_D32F_LOCKABLE: 2612 case FORMAT_DF24S8: 2613 case FORMAT_DF16S8: 2614 case FORMAT_D32FS8_TEXTURE: 2615 case FORMAT_D32FS8_SHADOW: 2616 case FORMAT_INTZ: 2617 return true; 2618 case FORMAT_S8: 2619 return false; 2620 default: 2621 return false; 2622 } 2623 } 2624 2625 bool Surface::isPalette(Format format) 2626 { 2627 switch(format) 2628 { 2629 case FORMAT_P8: 2630 case FORMAT_A8P8: 2631 return true; 2632 default: 2633 return false; 2634 } 2635 } 2636 2637 bool Surface::isFloatFormat(Format format) 2638 { 2639 switch(format) 2640 { 2641 case FORMAT_R5G6B5: 2642 case FORMAT_R8G8B8: 2643 case FORMAT_B8G8R8: 2644 case FORMAT_X8R8G8B8: 2645 case FORMAT_X8B8G8R8I: 2646 case FORMAT_X8B8G8R8: 2647 case FORMAT_A8R8G8B8: 2648 case FORMAT_A8B8G8R8I: 2649 case FORMAT_R8UI: 2650 case FORMAT_G8R8UI: 2651 case FORMAT_X8B8G8R8UI: 2652 case FORMAT_A8B8G8R8UI: 2653 case FORMAT_A8B8G8R8: 2654 case FORMAT_G8R8I: 2655 case FORMAT_G8R8: 2656 case FORMAT_A2B10G10R10: 2657 case FORMAT_R8I_SNORM: 2658 case FORMAT_G8R8I_SNORM: 2659 case FORMAT_X8B8G8R8I_SNORM: 2660 case FORMAT_A8B8G8R8I_SNORM: 2661 case FORMAT_R16I: 2662 case FORMAT_R16UI: 2663 case FORMAT_G16R16I: 2664 case FORMAT_G16R16UI: 2665 case FORMAT_G16R16: 2666 case FORMAT_X16B16G16R16I: 2667 case FORMAT_X16B16G16R16UI: 2668 case FORMAT_A16B16G16R16I: 2669 case FORMAT_A16B16G16R16UI: 2670 case FORMAT_A16B16G16R16: 2671 case FORMAT_V8U8: 2672 case FORMAT_Q8W8V8U8: 2673 case FORMAT_X8L8V8U8: 2674 case FORMAT_V16U16: 2675 case FORMAT_A16W16V16U16: 2676 case FORMAT_Q16W16V16U16: 2677 case FORMAT_A8: 2678 case FORMAT_R8I: 2679 case FORMAT_R8: 2680 case FORMAT_L8: 2681 case FORMAT_L16: 2682 case FORMAT_A8L8: 2683 case FORMAT_YV12_BT601: 2684 case FORMAT_YV12_BT709: 2685 case FORMAT_YV12_JFIF: 2686 case FORMAT_R32I: 2687 case FORMAT_R32UI: 2688 case FORMAT_G32R32I: 2689 case FORMAT_G32R32UI: 2690 case FORMAT_X32B32G32R32I: 2691 case FORMAT_X32B32G32R32UI: 2692 case FORMAT_A32B32G32R32I: 2693 case FORMAT_A32B32G32R32UI: 2694 return false; 2695 case FORMAT_R32F: 2696 case FORMAT_G32R32F: 2697 case FORMAT_X32B32G32R32F: 2698 case FORMAT_A32B32G32R32F: 2699 case FORMAT_D32F: 2700 case FORMAT_D32F_COMPLEMENTARY: 2701 case FORMAT_D32F_LOCKABLE: 2702 case FORMAT_D32FS8_TEXTURE: 2703 case FORMAT_D32FS8_SHADOW: 2704 case FORMAT_L16F: 2705 case FORMAT_A16L16F: 2706 case FORMAT_L32F: 2707 case FORMAT_A32L32F: 2708 return true; 2709 default: 2710 ASSERT(false); 2711 } 2712 2713 return false; 2714 } 2715 2716 bool Surface::isUnsignedComponent(Format format, int component) 2717 { 2718 switch(format) 2719 { 2720 case FORMAT_NULL: 2721 case FORMAT_R5G6B5: 2722 case FORMAT_R8G8B8: 2723 case FORMAT_B8G8R8: 2724 case FORMAT_X8R8G8B8: 2725 case FORMAT_X8B8G8R8: 2726 case FORMAT_A8R8G8B8: 2727 case FORMAT_A8B8G8R8: 2728 case FORMAT_G8R8: 2729 case FORMAT_A2B10G10R10: 2730 case FORMAT_R16UI: 2731 case FORMAT_G16R16: 2732 case FORMAT_G16R16UI: 2733 case FORMAT_X16B16G16R16UI: 2734 case FORMAT_A16B16G16R16: 2735 case FORMAT_A16B16G16R16UI: 2736 case FORMAT_R32UI: 2737 case FORMAT_G32R32UI: 2738 case FORMAT_X32B32G32R32UI: 2739 case FORMAT_A32B32G32R32UI: 2740 case FORMAT_R8UI: 2741 case FORMAT_G8R8UI: 2742 case FORMAT_X8B8G8R8UI: 2743 case FORMAT_A8B8G8R8UI: 2744 case FORMAT_D32F: 2745 case FORMAT_D32F_COMPLEMENTARY: 2746 case FORMAT_D32F_LOCKABLE: 2747 case FORMAT_D32FS8_TEXTURE: 2748 case FORMAT_D32FS8_SHADOW: 2749 case FORMAT_A8: 2750 case FORMAT_R8: 2751 case FORMAT_L8: 2752 case FORMAT_L16: 2753 case FORMAT_A8L8: 2754 case FORMAT_YV12_BT601: 2755 case FORMAT_YV12_BT709: 2756 case FORMAT_YV12_JFIF: 2757 return true; 2758 case FORMAT_A8B8G8R8I: 2759 case FORMAT_A16B16G16R16I: 2760 case FORMAT_A32B32G32R32I: 2761 case FORMAT_A8B8G8R8I_SNORM: 2762 case FORMAT_Q8W8V8U8: 2763 case FORMAT_Q16W16V16U16: 2764 case FORMAT_A32B32G32R32F: 2765 return false; 2766 case FORMAT_R32F: 2767 case FORMAT_R8I: 2768 case FORMAT_R16I: 2769 case FORMAT_R32I: 2770 case FORMAT_R8I_SNORM: 2771 return component >= 1; 2772 case FORMAT_V8U8: 2773 case FORMAT_X8L8V8U8: 2774 case FORMAT_V16U16: 2775 case FORMAT_G32R32F: 2776 case FORMAT_G8R8I: 2777 case FORMAT_G16R16I: 2778 case FORMAT_G32R32I: 2779 case FORMAT_G8R8I_SNORM: 2780 return component >= 2; 2781 case FORMAT_A16W16V16U16: 2782 case FORMAT_X32B32G32R32F: 2783 case FORMAT_X8B8G8R8I: 2784 case FORMAT_X16B16G16R16I: 2785 case FORMAT_X32B32G32R32I: 2786 case FORMAT_X8B8G8R8I_SNORM: 2787 return component >= 3; 2788 default: 2789 ASSERT(false); 2790 } 2791 2792 return false; 2793 } 2794 2795 bool Surface::isSRGBreadable(Format format) 2796 { 2797 // Keep in sync with Capabilities::isSRGBreadable 2798 switch(format) 2799 { 2800 case FORMAT_L8: 2801 case FORMAT_A8L8: 2802 case FORMAT_R8G8B8: 2803 case FORMAT_A8R8G8B8: 2804 case FORMAT_X8R8G8B8: 2805 case FORMAT_A8B8G8R8: 2806 case FORMAT_X8B8G8R8: 2807 case FORMAT_R5G6B5: 2808 case FORMAT_X1R5G5B5: 2809 case FORMAT_A1R5G5B5: 2810 case FORMAT_A4R4G4B4: 2811 #if S3TC_SUPPORT 2812 case FORMAT_DXT1: 2813 case FORMAT_DXT3: 2814 case FORMAT_DXT5: 2815 #endif 2816 case FORMAT_ATI1: 2817 case FORMAT_ATI2: 2818 return true; 2819 default: 2820 return false; 2821 } 2822 2823 return false; 2824 } 2825 2826 bool Surface::isSRGBwritable(Format format) 2827 { 2828 // Keep in sync with Capabilities::isSRGBwritable 2829 switch(format) 2830 { 2831 case FORMAT_NULL: 2832 case FORMAT_A8R8G8B8: 2833 case FORMAT_X8R8G8B8: 2834 case FORMAT_A8B8G8R8: 2835 case FORMAT_X8B8G8R8: 2836 case FORMAT_R5G6B5: 2837 return true; 2838 default: 2839 return false; 2840 } 2841 } 2842 2843 bool Surface::isCompressed(Format format) 2844 { 2845 switch(format) 2846 { 2847 #if S3TC_SUPPORT 2848 case FORMAT_DXT1: 2849 case FORMAT_DXT3: 2850 case FORMAT_DXT5: 2851 #endif 2852 case FORMAT_ATI1: 2853 case FORMAT_ATI2: 2854 case FORMAT_ETC1: 2855 case FORMAT_R11_EAC: 2856 case FORMAT_SIGNED_R11_EAC: 2857 case FORMAT_RG11_EAC: 2858 case FORMAT_SIGNED_RG11_EAC: 2859 case FORMAT_RGB8_ETC2: 2860 case FORMAT_SRGB8_ETC2: 2861 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2862 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2863 case FORMAT_RGBA8_ETC2_EAC: 2864 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 2865 case FORMAT_RGBA_ASTC_4x4_KHR: 2866 case FORMAT_RGBA_ASTC_5x4_KHR: 2867 case FORMAT_RGBA_ASTC_5x5_KHR: 2868 case FORMAT_RGBA_ASTC_6x5_KHR: 2869 case FORMAT_RGBA_ASTC_6x6_KHR: 2870 case FORMAT_RGBA_ASTC_8x5_KHR: 2871 case FORMAT_RGBA_ASTC_8x6_KHR: 2872 case FORMAT_RGBA_ASTC_8x8_KHR: 2873 case FORMAT_RGBA_ASTC_10x5_KHR: 2874 case FORMAT_RGBA_ASTC_10x6_KHR: 2875 case FORMAT_RGBA_ASTC_10x8_KHR: 2876 case FORMAT_RGBA_ASTC_10x10_KHR: 2877 case FORMAT_RGBA_ASTC_12x10_KHR: 2878 case FORMAT_RGBA_ASTC_12x12_KHR: 2879 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 2880 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 2881 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 2882 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 2883 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 2884 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 2885 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 2886 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 2887 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 2888 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 2889 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 2890 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 2891 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 2892 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 2893 return true; 2894 default: 2895 return false; 2896 } 2897 } 2898 2899 bool Surface::isNonNormalizedInteger(Format format) 2900 { 2901 switch(format) 2902 { 2903 case FORMAT_A8B8G8R8I: 2904 case FORMAT_X8B8G8R8I: 2905 case FORMAT_G8R8I: 2906 case FORMAT_R8I: 2907 case FORMAT_A8B8G8R8UI: 2908 case FORMAT_X8B8G8R8UI: 2909 case FORMAT_G8R8UI: 2910 case FORMAT_R8UI: 2911 case FORMAT_A16B16G16R16I: 2912 case FORMAT_X16B16G16R16I: 2913 case FORMAT_G16R16I: 2914 case FORMAT_R16I: 2915 case FORMAT_A16B16G16R16UI: 2916 case FORMAT_X16B16G16R16UI: 2917 case FORMAT_G16R16UI: 2918 case FORMAT_R16UI: 2919 case FORMAT_A32B32G32R32I: 2920 case FORMAT_X32B32G32R32I: 2921 case FORMAT_G32R32I: 2922 case FORMAT_R32I: 2923 case FORMAT_A32B32G32R32UI: 2924 case FORMAT_X32B32G32R32UI: 2925 case FORMAT_G32R32UI: 2926 case FORMAT_R32UI: 2927 return true; 2928 default: 2929 return false; 2930 } 2931 } 2932 2933 int Surface::componentCount(Format format) 2934 { 2935 switch(format) 2936 { 2937 case FORMAT_R5G6B5: return 3; 2938 case FORMAT_X8R8G8B8: return 3; 2939 case FORMAT_X8B8G8R8I: return 3; 2940 case FORMAT_X8B8G8R8: return 3; 2941 case FORMAT_A8R8G8B8: return 4; 2942 case FORMAT_A8B8G8R8I: return 4; 2943 case FORMAT_A8B8G8R8: return 4; 2944 case FORMAT_G8R8I: return 2; 2945 case FORMAT_G8R8: return 2; 2946 case FORMAT_R8I_SNORM: return 1; 2947 case FORMAT_G8R8I_SNORM: return 2; 2948 case FORMAT_X8B8G8R8I_SNORM:return 3; 2949 case FORMAT_A8B8G8R8I_SNORM:return 4; 2950 case FORMAT_R8UI: return 1; 2951 case FORMAT_G8R8UI: return 2; 2952 case FORMAT_X8B8G8R8UI: return 3; 2953 case FORMAT_A8B8G8R8UI: return 4; 2954 case FORMAT_A2B10G10R10: return 4; 2955 case FORMAT_G16R16I: return 2; 2956 case FORMAT_G16R16UI: return 2; 2957 case FORMAT_G16R16: return 2; 2958 case FORMAT_G32R32I: return 2; 2959 case FORMAT_G32R32UI: return 2; 2960 case FORMAT_X16B16G16R16I: return 3; 2961 case FORMAT_X16B16G16R16UI: return 3; 2962 case FORMAT_A16B16G16R16I: return 4; 2963 case FORMAT_A16B16G16R16UI: return 4; 2964 case FORMAT_A16B16G16R16: return 4; 2965 case FORMAT_X32B32G32R32I: return 3; 2966 case FORMAT_X32B32G32R32UI: return 3; 2967 case FORMAT_A32B32G32R32I: return 4; 2968 case FORMAT_A32B32G32R32UI: return 4; 2969 case FORMAT_V8U8: return 2; 2970 case FORMAT_Q8W8V8U8: return 4; 2971 case FORMAT_X8L8V8U8: return 3; 2972 case FORMAT_V16U16: return 2; 2973 case FORMAT_A16W16V16U16: return 4; 2974 case FORMAT_Q16W16V16U16: return 4; 2975 case FORMAT_R32F: return 1; 2976 case FORMAT_G32R32F: return 2; 2977 case FORMAT_X32B32G32R32F: return 3; 2978 case FORMAT_A32B32G32R32F: return 4; 2979 case FORMAT_D32F: return 1; 2980 case FORMAT_D32F_LOCKABLE: return 1; 2981 case FORMAT_D32FS8_TEXTURE: return 1; 2982 case FORMAT_D32FS8_SHADOW: return 1; 2983 case FORMAT_A8: return 1; 2984 case FORMAT_R8I: return 1; 2985 case FORMAT_R8: return 1; 2986 case FORMAT_R16I: return 1; 2987 case FORMAT_R16UI: return 1; 2988 case FORMAT_R32I: return 1; 2989 case FORMAT_R32UI: return 1; 2990 case FORMAT_L8: return 1; 2991 case FORMAT_L16: return 1; 2992 case FORMAT_A8L8: return 2; 2993 case FORMAT_YV12_BT601: return 3; 2994 case FORMAT_YV12_BT709: return 3; 2995 case FORMAT_YV12_JFIF: return 3; 2996 default: 2997 ASSERT(false); 2998 } 2999 3000 return 1; 3001 } 3002 3003 void *Surface::allocateBuffer(int width, int height, int depth, Format format) 3004 { 3005 // Render targets require 2x2 quads 3006 int width2 = (width + 1) & ~1; 3007 int height2 = (height + 1) & ~1; 3008 3009 // FIXME: Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes, 3010 // so we have to allocate 4 extra bytes to avoid buffer overruns. 3011 return allocateZero(size(width2, height2, depth, format) + 4); 3012 } 3013 3014 void Surface::memfill4(void *buffer, int pattern, int bytes) 3015 { 3016 while((size_t)buffer & 0x1 && bytes >= 1) 3017 { 3018 *(char*)buffer = (char)pattern; 3019 (char*&)buffer += 1; 3020 bytes -= 1; 3021 } 3022 3023 while((size_t)buffer & 0x3 && bytes >= 2) 3024 { 3025 *(short*)buffer = (short)pattern; 3026 (short*&)buffer += 1; 3027 bytes -= 2; 3028 } 3029 3030 if(CPUID::supportsSSE()) 3031 { 3032 while((size_t)buffer & 0xF && bytes >= 4) 3033 { 3034 *(int*)buffer = pattern; 3035 (int*&)buffer += 1; 3036 bytes -= 4; 3037 } 3038 3039 __m128 quad = _mm_set_ps1((float&)pattern); 3040 3041 float *pointer = (float*)buffer; 3042 int qxwords = bytes / 64; 3043 bytes -= qxwords * 64; 3044 3045 while(qxwords--) 3046 { 3047 _mm_stream_ps(pointer + 0, quad); 3048 _mm_stream_ps(pointer + 4, quad); 3049 _mm_stream_ps(pointer + 8, quad); 3050 _mm_stream_ps(pointer + 12, quad); 3051 3052 pointer += 16; 3053 } 3054 3055 buffer = pointer; 3056 } 3057 3058 while(bytes >= 4) 3059 { 3060 *(int*)buffer = (int)pattern; 3061 (int*&)buffer += 1; 3062 bytes -= 4; 3063 } 3064 3065 while(bytes >= 2) 3066 { 3067 *(short*)buffer = (short)pattern; 3068 (short*&)buffer += 1; 3069 bytes -= 2; 3070 } 3071 3072 while(bytes >= 1) 3073 { 3074 *(char*)buffer = (char)pattern; 3075 (char*&)buffer += 1; 3076 bytes -= 1; 3077 } 3078 } 3079 3080 bool Surface::isEntire(const SliceRect& rect) const 3081 { 3082 return (rect.x0 == 0 && rect.y0 == 0 && rect.x1 == internal.width && rect.y1 == internal.height && internal.depth == 1); 3083 } 3084 3085 SliceRect Surface::getRect() const 3086 { 3087 return SliceRect(0, 0, internal.width, internal.height, 0); 3088 } 3089 3090 void Surface::clearDepth(float depth, int x0, int y0, int width, int height) 3091 { 3092 if(width == 0 || height == 0) return; 3093 3094 // Not overlapping 3095 if(x0 > internal.width) return; 3096 if(y0 > internal.height) return; 3097 if(x0 + width < 0) return; 3098 if(y0 + height < 0) return; 3099 3100 // Clip against dimensions 3101 if(x0 < 0) {width += x0; x0 = 0;} 3102 if(x0 + width > internal.width) width = internal.width - x0; 3103 if(y0 < 0) {height += y0; y0 = 0;} 3104 if(y0 + height > internal.height) height = internal.height - y0; 3105 3106 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height; 3107 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY; 3108 3109 int width2 = (internal.width + 1) & ~1; 3110 3111 int x1 = x0 + width; 3112 int y1 = y0 + height; 3113 3114 if(internal.format == FORMAT_D32F_LOCKABLE || 3115 internal.format == FORMAT_D32FS8_TEXTURE || 3116 internal.format == FORMAT_D32FS8_SHADOW) 3117 { 3118 float *target = (float*)lockInternal(0, 0, 0, lock, PUBLIC) + x0 + width2 * y0; 3119 3120 for(int z = 0; z < internal.depth; z++) 3121 { 3122 for(int y = y0; y < y1; y++) 3123 { 3124 memfill4(target, (int&)depth, 4 * width); 3125 target += width2; 3126 } 3127 } 3128 3129 unlockInternal(); 3130 } 3131 else // Quad layout 3132 { 3133 if(complementaryDepthBuffer) 3134 { 3135 depth = 1 - depth; 3136 } 3137 3138 float *buffer = (float*)lockInternal(0, 0, 0, lock, PUBLIC); 3139 3140 int oddX0 = (x0 & ~1) * 2 + (x0 & 1); 3141 int oddX1 = (x1 & ~1) * 2; 3142 int evenX0 = ((x0 + 1) & ~1) * 2; 3143 int evenBytes = (oddX1 - evenX0) * sizeof(float); 3144 3145 for(int z = 0; z < internal.depth; z++) 3146 { 3147 for(int y = y0; y < y1; y++) 3148 { 3149 float *target = buffer + (y & ~1) * width2 + (y & 1) * 2; 3150 3151 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once 3152 { 3153 if((x0 & 1) != 0) 3154 { 3155 target[oddX0 + 0] = depth; 3156 target[oddX0 + 2] = depth; 3157 } 3158 3159 // for(int x2 = evenX0; x2 < x1 * 2; x2 += 4) 3160 // { 3161 // target[x2 + 0] = depth; 3162 // target[x2 + 1] = depth; 3163 // target[x2 + 2] = depth; 3164 // target[x2 + 3] = depth; 3165 // } 3166 3167 // __asm 3168 // { 3169 // movss xmm0, depth 3170 // shufps xmm0, xmm0, 0x00 3171 // 3172 // mov eax, x0 3173 // add eax, 1 3174 // and eax, 0xFFFFFFFE 3175 // cmp eax, x1 3176 // jge qEnd 3177 // 3178 // mov edi, target 3179 // 3180 // qLoop: 3181 // movntps [edi+8*eax], xmm0 3182 // 3183 // add eax, 2 3184 // cmp eax, x1 3185 // jl qLoop 3186 // qEnd: 3187 // } 3188 3189 memfill4(&target[evenX0], (int&)depth, evenBytes); 3190 3191 if((x1 & 1) != 0) 3192 { 3193 target[oddX1 + 0] = depth; 3194 target[oddX1 + 2] = depth; 3195 } 3196 3197 y++; 3198 } 3199 else 3200 { 3201 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1)) 3202 { 3203 target[i] = depth; 3204 } 3205 } 3206 } 3207 3208 buffer += internal.sliceP; 3209 } 3210 3211 unlockInternal(); 3212 } 3213 } 3214 3215 void Surface::clearStencil(unsigned char s, unsigned char mask, int x0, int y0, int width, int height) 3216 { 3217 if(mask == 0 || width == 0 || height == 0) return; 3218 3219 // Not overlapping 3220 if(x0 > internal.width) return; 3221 if(y0 > internal.height) return; 3222 if(x0 + width < 0) return; 3223 if(y0 + height < 0) return; 3224 3225 // Clip against dimensions 3226 if(x0 < 0) {width += x0; x0 = 0;} 3227 if(x0 + width > internal.width) width = internal.width - x0; 3228 if(y0 < 0) {height += y0; y0 = 0;} 3229 if(y0 + height > internal.height) height = internal.height - y0; 3230 3231 int width2 = (internal.width + 1) & ~1; 3232 3233 int x1 = x0 + width; 3234 int y1 = y0 + height; 3235 3236 int oddX0 = (x0 & ~1) * 2 + (x0 & 1); 3237 int oddX1 = (x1 & ~1) * 2; 3238 int evenX0 = ((x0 + 1) & ~1) * 2; 3239 int evenBytes = oddX1 - evenX0; 3240 3241 unsigned char maskedS = s & mask; 3242 unsigned char invMask = ~mask; 3243 unsigned int fill = maskedS; 3244 fill = fill | (fill << 8) | (fill << 16) + (fill << 24); 3245 3246 char *buffer = (char*)lockStencil(0, PUBLIC); 3247 3248 // Stencil buffers are assumed to use quad layout 3249 for(int z = 0; z < stencil.depth; z++) 3250 { 3251 for(int y = y0; y < y1; y++) 3252 { 3253 char *target = buffer + (y & ~1) * width2 + (y & 1) * 2; 3254 3255 if((y & 1) == 0 && y + 1 < y1 && mask == 0xFF) // Fill quad line at once 3256 { 3257 if((x0 & 1) != 0) 3258 { 3259 target[oddX0 + 0] = fill; 3260 target[oddX0 + 2] = fill; 3261 } 3262 3263 memfill4(&target[evenX0], fill, evenBytes); 3264 3265 if((x1 & 1) != 0) 3266 { 3267 target[oddX1 + 0] = fill; 3268 target[oddX1 + 2] = fill; 3269 } 3270 3271 y++; 3272 } 3273 else 3274 { 3275 for(int x = x0, i = oddX0; x < x1; x++, i = (x & ~1) * 2 + (x & 1)) 3276 { 3277 target[i] = maskedS | (target[i] & invMask); 3278 } 3279 } 3280 } 3281 3282 buffer += stencil.sliceP; 3283 } 3284 3285 unlockStencil(); 3286 } 3287 3288 void Surface::fill(const Color<float> &color, int x0, int y0, int width, int height) 3289 { 3290 unsigned char *row; 3291 Buffer *buffer; 3292 3293 if(internal.dirty) 3294 { 3295 row = (unsigned char*)lockInternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC); 3296 buffer = &internal; 3297 } 3298 else 3299 { 3300 row = (unsigned char*)lockExternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC); 3301 buffer = &external; 3302 } 3303 3304 if(buffer->bytes <= 4) 3305 { 3306 int c; 3307 buffer->write(&c, color); 3308 3309 if(buffer->bytes <= 1) c = (c << 8) | c; 3310 if(buffer->bytes <= 2) c = (c << 16) | c; 3311 3312 for(int y = 0; y < height; y++) 3313 { 3314 memfill4(row, c, width * buffer->bytes); 3315 3316 row += buffer->pitchB; 3317 } 3318 } 3319 else // Generic 3320 { 3321 for(int y = 0; y < height; y++) 3322 { 3323 unsigned char *element = row; 3324 3325 for(int x = 0; x < width; x++) 3326 { 3327 buffer->write(element, color); 3328 3329 element += buffer->bytes; 3330 } 3331 3332 row += buffer->pitchB; 3333 } 3334 } 3335 3336 if(buffer == &internal) 3337 { 3338 unlockInternal(); 3339 } 3340 else 3341 { 3342 unlockExternal(); 3343 } 3344 } 3345 3346 void Surface::copyInternal(const Surface* source, int x, int y, float srcX, float srcY, bool filter) 3347 { 3348 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED); 3349 3350 sw::Color<float> color; 3351 3352 if(!filter) 3353 { 3354 color = source->internal.read((int)srcX, (int)srcY); 3355 } 3356 else // Bilinear filtering 3357 { 3358 color = source->internal.sample(srcX, srcY); 3359 } 3360 3361 internal.write(x, y, color); 3362 } 3363 3364 void Surface::copyInternal(const Surface* source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter) 3365 { 3366 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED); 3367 3368 sw::Color<float> color; 3369 3370 if(!filter) 3371 { 3372 color = source->internal.read((int)srcX, (int)srcY, int(srcZ)); 3373 } 3374 else // Bilinear filtering 3375 { 3376 color = source->internal.sample(srcX, srcY, srcZ); 3377 } 3378 3379 internal.write(x, y, z, color); 3380 } 3381 3382 bool Surface::hasStencil() const 3383 { 3384 return isStencil(external.format); 3385 } 3386 3387 bool Surface::hasDepth() const 3388 { 3389 return isDepth(external.format); 3390 } 3391 3392 bool Surface::hasPalette() const 3393 { 3394 return isPalette(external.format); 3395 } 3396 3397 bool Surface::isRenderTarget() const 3398 { 3399 return renderTarget; 3400 } 3401 3402 bool Surface::hasDirtyMipmaps() const 3403 { 3404 return dirtyMipmaps; 3405 } 3406 3407 void Surface::cleanMipmaps() 3408 { 3409 dirtyMipmaps = false; 3410 } 3411 3412 Resource *Surface::getResource() 3413 { 3414 return resource; 3415 } 3416 3417 bool Surface::identicalFormats() const 3418 { 3419 return external.format == internal.format && 3420 external.width == internal.width && 3421 external.height == internal.height && 3422 external.depth == internal.depth && 3423 external.pitchB == internal.pitchB && 3424 external.sliceB == internal.sliceB; 3425 } 3426 3427 Format Surface::selectInternalFormat(Format format) const 3428 { 3429 switch(format) 3430 { 3431 case FORMAT_NULL: 3432 return FORMAT_NULL; 3433 case FORMAT_P8: 3434 case FORMAT_A8P8: 3435 case FORMAT_A4R4G4B4: 3436 case FORMAT_A1R5G5B5: 3437 case FORMAT_A8R3G3B2: 3438 return FORMAT_A8R8G8B8; 3439 case FORMAT_A8: 3440 return FORMAT_A8; 3441 case FORMAT_R8I: 3442 return FORMAT_R8I; 3443 case FORMAT_R8UI: 3444 return FORMAT_R8UI; 3445 case FORMAT_R8I_SNORM: 3446 return FORMAT_R8I_SNORM; 3447 case FORMAT_R8: 3448 return FORMAT_R8; 3449 case FORMAT_R16I: 3450 return FORMAT_R16I; 3451 case FORMAT_R16UI: 3452 return FORMAT_R16UI; 3453 case FORMAT_R32I: 3454 return FORMAT_R32I; 3455 case FORMAT_R32UI: 3456 return FORMAT_R32UI; 3457 case FORMAT_X16B16G16R16I: 3458 case FORMAT_A16B16G16R16I: 3459 return FORMAT_A16B16G16R16I; 3460 case FORMAT_X16B16G16R16UI: 3461 case FORMAT_A16B16G16R16UI: 3462 return FORMAT_A16B16G16R16UI; 3463 case FORMAT_A2R10G10B10: 3464 case FORMAT_A2B10G10R10: 3465 case FORMAT_A16B16G16R16: 3466 return FORMAT_A16B16G16R16; 3467 case FORMAT_X32B32G32R32I: 3468 case FORMAT_A32B32G32R32I: 3469 return FORMAT_A32B32G32R32I; 3470 case FORMAT_X32B32G32R32UI: 3471 case FORMAT_A32B32G32R32UI: 3472 return FORMAT_A32B32G32R32UI; 3473 case FORMAT_G8R8I: 3474 return FORMAT_G8R8I; 3475 case FORMAT_G8R8UI: 3476 return FORMAT_G8R8UI; 3477 case FORMAT_G8R8I_SNORM: 3478 return FORMAT_G8R8I_SNORM; 3479 case FORMAT_G8R8: 3480 return FORMAT_G8R8; 3481 case FORMAT_G16R16I: 3482 return FORMAT_G16R16I; 3483 case FORMAT_G16R16UI: 3484 return FORMAT_G16R16UI; 3485 case FORMAT_G16R16: 3486 return FORMAT_G16R16; 3487 case FORMAT_G32R32I: 3488 return FORMAT_G32R32I; 3489 case FORMAT_G32R32UI: 3490 return FORMAT_G32R32UI; 3491 case FORMAT_A8R8G8B8: 3492 if(lockable || !quadLayoutEnabled) 3493 { 3494 return FORMAT_A8R8G8B8; 3495 } 3496 else 3497 { 3498 return FORMAT_A8G8R8B8Q; 3499 } 3500 case FORMAT_A8B8G8R8I: 3501 return FORMAT_A8B8G8R8I; 3502 case FORMAT_A8B8G8R8UI: 3503 return FORMAT_A8B8G8R8UI; 3504 case FORMAT_A8B8G8R8I_SNORM: 3505 return FORMAT_A8B8G8R8I_SNORM; 3506 case FORMAT_R5G5B5A1: 3507 case FORMAT_R4G4B4A4: 3508 case FORMAT_A8B8G8R8: 3509 return FORMAT_A8B8G8R8; 3510 case FORMAT_R5G6B5: 3511 return FORMAT_R5G6B5; 3512 case FORMAT_R3G3B2: 3513 case FORMAT_R8G8B8: 3514 case FORMAT_X4R4G4B4: 3515 case FORMAT_X1R5G5B5: 3516 case FORMAT_X8R8G8B8: 3517 if(lockable || !quadLayoutEnabled) 3518 { 3519 return FORMAT_X8R8G8B8; 3520 } 3521 else 3522 { 3523 return FORMAT_X8G8R8B8Q; 3524 } 3525 case FORMAT_X8B8G8R8I: 3526 return FORMAT_X8B8G8R8I; 3527 case FORMAT_X8B8G8R8UI: 3528 return FORMAT_X8B8G8R8UI; 3529 case FORMAT_X8B8G8R8I_SNORM: 3530 return FORMAT_X8B8G8R8I_SNORM; 3531 case FORMAT_B8G8R8: 3532 case FORMAT_X8B8G8R8: 3533 return FORMAT_X8B8G8R8; 3534 // Compressed formats 3535 #if S3TC_SUPPORT 3536 case FORMAT_DXT1: 3537 case FORMAT_DXT3: 3538 case FORMAT_DXT5: 3539 #endif 3540 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3541 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3542 case FORMAT_RGBA8_ETC2_EAC: 3543 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 3544 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 3545 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 3546 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 3547 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 3548 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 3549 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 3550 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 3551 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 3552 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 3553 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 3554 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 3555 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 3556 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 3557 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 3558 return FORMAT_A8R8G8B8; 3559 case FORMAT_RGBA_ASTC_4x4_KHR: 3560 case FORMAT_RGBA_ASTC_5x4_KHR: 3561 case FORMAT_RGBA_ASTC_5x5_KHR: 3562 case FORMAT_RGBA_ASTC_6x5_KHR: 3563 case FORMAT_RGBA_ASTC_6x6_KHR: 3564 case FORMAT_RGBA_ASTC_8x5_KHR: 3565 case FORMAT_RGBA_ASTC_8x6_KHR: 3566 case FORMAT_RGBA_ASTC_8x8_KHR: 3567 case FORMAT_RGBA_ASTC_10x5_KHR: 3568 case FORMAT_RGBA_ASTC_10x6_KHR: 3569 case FORMAT_RGBA_ASTC_10x8_KHR: 3570 case FORMAT_RGBA_ASTC_10x10_KHR: 3571 case FORMAT_RGBA_ASTC_12x10_KHR: 3572 case FORMAT_RGBA_ASTC_12x12_KHR: 3573 // ASTC supports HDR, so a floating point format is required to represent it properly 3574 return FORMAT_A32B32G32R32F; // FIXME: 16FP is probably sufficient, but it's currently unsupported 3575 case FORMAT_ATI1: 3576 case FORMAT_R11_EAC: 3577 return FORMAT_R8; 3578 case FORMAT_SIGNED_R11_EAC: 3579 return FORMAT_R32F; // FIXME: Signed 8bit format would be sufficient 3580 case FORMAT_ATI2: 3581 case FORMAT_RG11_EAC: 3582 return FORMAT_G8R8; 3583 case FORMAT_SIGNED_RG11_EAC: 3584 return FORMAT_G32R32F; // FIXME: Signed 8bit format would be sufficient 3585 case FORMAT_ETC1: 3586 case FORMAT_RGB8_ETC2: 3587 case FORMAT_SRGB8_ETC2: 3588 return FORMAT_X8R8G8B8; 3589 // Bumpmap formats 3590 case FORMAT_V8U8: return FORMAT_V8U8; 3591 case FORMAT_L6V5U5: return FORMAT_X8L8V8U8; 3592 case FORMAT_Q8W8V8U8: return FORMAT_Q8W8V8U8; 3593 case FORMAT_X8L8V8U8: return FORMAT_X8L8V8U8; 3594 case FORMAT_V16U16: return FORMAT_V16U16; 3595 case FORMAT_A2W10V10U10: return FORMAT_A16W16V16U16; 3596 case FORMAT_Q16W16V16U16: return FORMAT_Q16W16V16U16; 3597 // Floating-point formats 3598 case FORMAT_A16F: return FORMAT_A32B32G32R32F; 3599 case FORMAT_R16F: return FORMAT_R32F; 3600 case FORMAT_G16R16F: return FORMAT_G32R32F; 3601 case FORMAT_B16G16R16F: return FORMAT_X32B32G32R32F; 3602 case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F; 3603 case FORMAT_A32F: return FORMAT_A32B32G32R32F; 3604 case FORMAT_R32F: return FORMAT_R32F; 3605 case FORMAT_G32R32F: return FORMAT_G32R32F; 3606 case FORMAT_B32G32R32F: return FORMAT_X32B32G32R32F; 3607 case FORMAT_X32B32G32R32F: return FORMAT_X32B32G32R32F; 3608 case FORMAT_A32B32G32R32F: return FORMAT_A32B32G32R32F; 3609 // Luminance formats 3610 case FORMAT_L8: return FORMAT_L8; 3611 case FORMAT_A4L4: return FORMAT_A8L8; 3612 case FORMAT_L16: return FORMAT_L16; 3613 case FORMAT_A8L8: return FORMAT_A8L8; 3614 case FORMAT_L16F: return FORMAT_X32B32G32R32F; 3615 case FORMAT_A16L16F: return FORMAT_A32B32G32R32F; 3616 case FORMAT_L32F: return FORMAT_X32B32G32R32F; 3617 case FORMAT_A32L32F: return FORMAT_A32B32G32R32F; 3618 // Depth/stencil formats 3619 case FORMAT_D16: 3620 case FORMAT_D32: 3621 case FORMAT_D24X8: 3622 case FORMAT_D24S8: 3623 case FORMAT_D24FS8: 3624 if(hasParent) // Texture 3625 { 3626 return FORMAT_D32FS8_SHADOW; 3627 } 3628 else if(complementaryDepthBuffer) 3629 { 3630 return FORMAT_D32F_COMPLEMENTARY; 3631 } 3632 else 3633 { 3634 return FORMAT_D32F; 3635 } 3636 case FORMAT_D32F: return FORMAT_D32F; 3637 case FORMAT_D32F_LOCKABLE: return FORMAT_D32F_LOCKABLE; 3638 case FORMAT_D32FS8_TEXTURE: return FORMAT_D32FS8_TEXTURE; 3639 case FORMAT_INTZ: return FORMAT_D32FS8_TEXTURE; 3640 case FORMAT_DF24S8: return FORMAT_D32FS8_SHADOW; 3641 case FORMAT_DF16S8: return FORMAT_D32FS8_SHADOW; 3642 case FORMAT_YV12_BT601: return FORMAT_YV12_BT601; 3643 case FORMAT_YV12_BT709: return FORMAT_YV12_BT709; 3644 case FORMAT_YV12_JFIF: return FORMAT_YV12_JFIF; 3645 default: 3646 ASSERT(false); 3647 } 3648 3649 return FORMAT_NULL; 3650 } 3651 3652 void Surface::setTexturePalette(unsigned int *palette) 3653 { 3654 Surface::palette = palette; 3655 Surface::paletteID++; 3656 } 3657 3658 void Surface::resolve() 3659 { 3660 if(internal.depth <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL) 3661 { 3662 return; 3663 } 3664 3665 void *source = internal.lockRect(0, 0, 0, LOCK_READWRITE); 3666 3667 int quality = internal.depth; 3668 int width = internal.width; 3669 int height = internal.height; 3670 int pitch = internal.pitchB; 3671 int slice = internal.sliceB; 3672 3673 unsigned char *source0 = (unsigned char*)source; 3674 unsigned char *source1 = source0 + slice; 3675 unsigned char *source2 = source1 + slice; 3676 unsigned char *source3 = source2 + slice; 3677 unsigned char *source4 = source3 + slice; 3678 unsigned char *source5 = source4 + slice; 3679 unsigned char *source6 = source5 + slice; 3680 unsigned char *source7 = source6 + slice; 3681 unsigned char *source8 = source7 + slice; 3682 unsigned char *source9 = source8 + slice; 3683 unsigned char *sourceA = source9 + slice; 3684 unsigned char *sourceB = sourceA + slice; 3685 unsigned char *sourceC = sourceB + slice; 3686 unsigned char *sourceD = sourceC + slice; 3687 unsigned char *sourceE = sourceD + slice; 3688 unsigned char *sourceF = sourceE + slice; 3689 3690 if(internal.format == FORMAT_X8R8G8B8 || internal.format == FORMAT_A8R8G8B8 || internal.format == FORMAT_X8B8G8R8 || internal.format == FORMAT_A8B8G8R8) 3691 { 3692 if(CPUID::supportsSSE2() && (width % 4) == 0) 3693 { 3694 if(internal.depth == 2) 3695 { 3696 for(int y = 0; y < height; y++) 3697 { 3698 for(int x = 0; x < width; x += 4) 3699 { 3700 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3701 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3702 3703 c0 = _mm_avg_epu8(c0, c1); 3704 3705 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3706 } 3707 3708 source0 += pitch; 3709 source1 += pitch; 3710 } 3711 } 3712 else if(internal.depth == 4) 3713 { 3714 for(int y = 0; y < height; y++) 3715 { 3716 for(int x = 0; x < width; x += 4) 3717 { 3718 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3719 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3720 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 3721 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 3722 3723 c0 = _mm_avg_epu8(c0, c1); 3724 c2 = _mm_avg_epu8(c2, c3); 3725 c0 = _mm_avg_epu8(c0, c2); 3726 3727 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3728 } 3729 3730 source0 += pitch; 3731 source1 += pitch; 3732 source2 += pitch; 3733 source3 += pitch; 3734 } 3735 } 3736 else if(internal.depth == 8) 3737 { 3738 for(int y = 0; y < height; y++) 3739 { 3740 for(int x = 0; x < width; x += 4) 3741 { 3742 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3743 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3744 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 3745 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 3746 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 3747 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 3748 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 3749 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 3750 3751 c0 = _mm_avg_epu8(c0, c1); 3752 c2 = _mm_avg_epu8(c2, c3); 3753 c4 = _mm_avg_epu8(c4, c5); 3754 c6 = _mm_avg_epu8(c6, c7); 3755 c0 = _mm_avg_epu8(c0, c2); 3756 c4 = _mm_avg_epu8(c4, c6); 3757 c0 = _mm_avg_epu8(c0, c4); 3758 3759 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3760 } 3761 3762 source0 += pitch; 3763 source1 += pitch; 3764 source2 += pitch; 3765 source3 += pitch; 3766 source4 += pitch; 3767 source5 += pitch; 3768 source6 += pitch; 3769 source7 += pitch; 3770 } 3771 } 3772 else if(internal.depth == 16) 3773 { 3774 for(int y = 0; y < height; y++) 3775 { 3776 for(int x = 0; x < width; x += 4) 3777 { 3778 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3779 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3780 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 3781 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 3782 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 3783 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 3784 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 3785 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 3786 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x)); 3787 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x)); 3788 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x)); 3789 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x)); 3790 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x)); 3791 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x)); 3792 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x)); 3793 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x)); 3794 3795 c0 = _mm_avg_epu8(c0, c1); 3796 c2 = _mm_avg_epu8(c2, c3); 3797 c4 = _mm_avg_epu8(c4, c5); 3798 c6 = _mm_avg_epu8(c6, c7); 3799 c8 = _mm_avg_epu8(c8, c9); 3800 cA = _mm_avg_epu8(cA, cB); 3801 cC = _mm_avg_epu8(cC, cD); 3802 cE = _mm_avg_epu8(cE, cF); 3803 c0 = _mm_avg_epu8(c0, c2); 3804 c4 = _mm_avg_epu8(c4, c6); 3805 c8 = _mm_avg_epu8(c8, cA); 3806 cC = _mm_avg_epu8(cC, cE); 3807 c0 = _mm_avg_epu8(c0, c4); 3808 c8 = _mm_avg_epu8(c8, cC); 3809 c0 = _mm_avg_epu8(c0, c8); 3810 3811 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3812 } 3813 3814 source0 += pitch; 3815 source1 += pitch; 3816 source2 += pitch; 3817 source3 += pitch; 3818 source4 += pitch; 3819 source5 += pitch; 3820 source6 += pitch; 3821 source7 += pitch; 3822 source8 += pitch; 3823 source9 += pitch; 3824 sourceA += pitch; 3825 sourceB += pitch; 3826 sourceC += pitch; 3827 sourceD += pitch; 3828 sourceE += pitch; 3829 sourceF += pitch; 3830 } 3831 } 3832 else ASSERT(false); 3833 } 3834 else 3835 { 3836 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7F7F7F7F) + (((x) ^ (y)) & 0x01010101)) 3837 3838 if(internal.depth == 2) 3839 { 3840 for(int y = 0; y < height; y++) 3841 { 3842 for(int x = 0; x < width; x++) 3843 { 3844 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 3845 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 3846 3847 c0 = AVERAGE(c0, c1); 3848 3849 *(unsigned int*)(source0 + 4 * x) = c0; 3850 } 3851 3852 source0 += pitch; 3853 source1 += pitch; 3854 } 3855 } 3856 else if(internal.depth == 4) 3857 { 3858 for(int y = 0; y < height; y++) 3859 { 3860 for(int x = 0; x < width; x++) 3861 { 3862 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 3863 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 3864 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 3865 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 3866 3867 c0 = AVERAGE(c0, c1); 3868 c2 = AVERAGE(c2, c3); 3869 c0 = AVERAGE(c0, c2); 3870 3871 *(unsigned int*)(source0 + 4 * x) = c0; 3872 } 3873 3874 source0 += pitch; 3875 source1 += pitch; 3876 source2 += pitch; 3877 source3 += pitch; 3878 } 3879 } 3880 else if(internal.depth == 8) 3881 { 3882 for(int y = 0; y < height; y++) 3883 { 3884 for(int x = 0; x < width; x++) 3885 { 3886 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 3887 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 3888 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 3889 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 3890 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 3891 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 3892 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 3893 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 3894 3895 c0 = AVERAGE(c0, c1); 3896 c2 = AVERAGE(c2, c3); 3897 c4 = AVERAGE(c4, c5); 3898 c6 = AVERAGE(c6, c7); 3899 c0 = AVERAGE(c0, c2); 3900 c4 = AVERAGE(c4, c6); 3901 c0 = AVERAGE(c0, c4); 3902 3903 *(unsigned int*)(source0 + 4 * x) = c0; 3904 } 3905 3906 source0 += pitch; 3907 source1 += pitch; 3908 source2 += pitch; 3909 source3 += pitch; 3910 source4 += pitch; 3911 source5 += pitch; 3912 source6 += pitch; 3913 source7 += pitch; 3914 } 3915 } 3916 else if(internal.depth == 16) 3917 { 3918 for(int y = 0; y < height; y++) 3919 { 3920 for(int x = 0; x < width; x++) 3921 { 3922 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 3923 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 3924 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 3925 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 3926 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 3927 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 3928 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 3929 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 3930 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 3931 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 3932 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 3933 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 3934 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 3935 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 3936 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 3937 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 3938 3939 c0 = AVERAGE(c0, c1); 3940 c2 = AVERAGE(c2, c3); 3941 c4 = AVERAGE(c4, c5); 3942 c6 = AVERAGE(c6, c7); 3943 c8 = AVERAGE(c8, c9); 3944 cA = AVERAGE(cA, cB); 3945 cC = AVERAGE(cC, cD); 3946 cE = AVERAGE(cE, cF); 3947 c0 = AVERAGE(c0, c2); 3948 c4 = AVERAGE(c4, c6); 3949 c8 = AVERAGE(c8, cA); 3950 cC = AVERAGE(cC, cE); 3951 c0 = AVERAGE(c0, c4); 3952 c8 = AVERAGE(c8, cC); 3953 c0 = AVERAGE(c0, c8); 3954 3955 *(unsigned int*)(source0 + 4 * x) = c0; 3956 } 3957 3958 source0 += pitch; 3959 source1 += pitch; 3960 source2 += pitch; 3961 source3 += pitch; 3962 source4 += pitch; 3963 source5 += pitch; 3964 source6 += pitch; 3965 source7 += pitch; 3966 source8 += pitch; 3967 source9 += pitch; 3968 sourceA += pitch; 3969 sourceB += pitch; 3970 sourceC += pitch; 3971 sourceD += pitch; 3972 sourceE += pitch; 3973 sourceF += pitch; 3974 } 3975 } 3976 else ASSERT(false); 3977 3978 #undef AVERAGE 3979 } 3980 } 3981 else if(internal.format == FORMAT_G16R16) 3982 { 3983 if(CPUID::supportsSSE2() && (width % 4) == 0) 3984 { 3985 if(internal.depth == 2) 3986 { 3987 for(int y = 0; y < height; y++) 3988 { 3989 for(int x = 0; x < width; x += 4) 3990 { 3991 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3992 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3993 3994 c0 = _mm_avg_epu16(c0, c1); 3995 3996 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3997 } 3998 3999 source0 += pitch; 4000 source1 += pitch; 4001 } 4002 } 4003 else if(internal.depth == 4) 4004 { 4005 for(int y = 0; y < height; y++) 4006 { 4007 for(int x = 0; x < width; x += 4) 4008 { 4009 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4010 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4011 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4012 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4013 4014 c0 = _mm_avg_epu16(c0, c1); 4015 c2 = _mm_avg_epu16(c2, c3); 4016 c0 = _mm_avg_epu16(c0, c2); 4017 4018 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4019 } 4020 4021 source0 += pitch; 4022 source1 += pitch; 4023 source2 += pitch; 4024 source3 += pitch; 4025 } 4026 } 4027 else if(internal.depth == 8) 4028 { 4029 for(int y = 0; y < height; y++) 4030 { 4031 for(int x = 0; x < width; x += 4) 4032 { 4033 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4034 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4035 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4036 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4037 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4038 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4039 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4040 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4041 4042 c0 = _mm_avg_epu16(c0, c1); 4043 c2 = _mm_avg_epu16(c2, c3); 4044 c4 = _mm_avg_epu16(c4, c5); 4045 c6 = _mm_avg_epu16(c6, c7); 4046 c0 = _mm_avg_epu16(c0, c2); 4047 c4 = _mm_avg_epu16(c4, c6); 4048 c0 = _mm_avg_epu16(c0, c4); 4049 4050 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4051 } 4052 4053 source0 += pitch; 4054 source1 += pitch; 4055 source2 += pitch; 4056 source3 += pitch; 4057 source4 += pitch; 4058 source5 += pitch; 4059 source6 += pitch; 4060 source7 += pitch; 4061 } 4062 } 4063 else if(internal.depth == 16) 4064 { 4065 for(int y = 0; y < height; y++) 4066 { 4067 for(int x = 0; x < width; x += 4) 4068 { 4069 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4070 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4071 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4072 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4073 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4074 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4075 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4076 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4077 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x)); 4078 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x)); 4079 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x)); 4080 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x)); 4081 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x)); 4082 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x)); 4083 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x)); 4084 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x)); 4085 4086 c0 = _mm_avg_epu16(c0, c1); 4087 c2 = _mm_avg_epu16(c2, c3); 4088 c4 = _mm_avg_epu16(c4, c5); 4089 c6 = _mm_avg_epu16(c6, c7); 4090 c8 = _mm_avg_epu16(c8, c9); 4091 cA = _mm_avg_epu16(cA, cB); 4092 cC = _mm_avg_epu16(cC, cD); 4093 cE = _mm_avg_epu16(cE, cF); 4094 c0 = _mm_avg_epu16(c0, c2); 4095 c4 = _mm_avg_epu16(c4, c6); 4096 c8 = _mm_avg_epu16(c8, cA); 4097 cC = _mm_avg_epu16(cC, cE); 4098 c0 = _mm_avg_epu16(c0, c4); 4099 c8 = _mm_avg_epu16(c8, cC); 4100 c0 = _mm_avg_epu16(c0, c8); 4101 4102 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4103 } 4104 4105 source0 += pitch; 4106 source1 += pitch; 4107 source2 += pitch; 4108 source3 += pitch; 4109 source4 += pitch; 4110 source5 += pitch; 4111 source6 += pitch; 4112 source7 += pitch; 4113 source8 += pitch; 4114 source9 += pitch; 4115 sourceA += pitch; 4116 sourceB += pitch; 4117 sourceC += pitch; 4118 sourceD += pitch; 4119 sourceE += pitch; 4120 sourceF += pitch; 4121 } 4122 } 4123 else ASSERT(false); 4124 } 4125 else 4126 { 4127 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001)) 4128 4129 if(internal.depth == 2) 4130 { 4131 for(int y = 0; y < height; y++) 4132 { 4133 for(int x = 0; x < width; x++) 4134 { 4135 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4136 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4137 4138 c0 = AVERAGE(c0, c1); 4139 4140 *(unsigned int*)(source0 + 4 * x) = c0; 4141 } 4142 4143 source0 += pitch; 4144 source1 += pitch; 4145 } 4146 } 4147 else if(internal.depth == 4) 4148 { 4149 for(int y = 0; y < height; y++) 4150 { 4151 for(int x = 0; x < width; x++) 4152 { 4153 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4154 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4155 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4156 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4157 4158 c0 = AVERAGE(c0, c1); 4159 c2 = AVERAGE(c2, c3); 4160 c0 = AVERAGE(c0, c2); 4161 4162 *(unsigned int*)(source0 + 4 * x) = c0; 4163 } 4164 4165 source0 += pitch; 4166 source1 += pitch; 4167 source2 += pitch; 4168 source3 += pitch; 4169 } 4170 } 4171 else if(internal.depth == 8) 4172 { 4173 for(int y = 0; y < height; y++) 4174 { 4175 for(int x = 0; x < width; x++) 4176 { 4177 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4178 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4179 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4180 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4181 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4182 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4183 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4184 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4185 4186 c0 = AVERAGE(c0, c1); 4187 c2 = AVERAGE(c2, c3); 4188 c4 = AVERAGE(c4, c5); 4189 c6 = AVERAGE(c6, c7); 4190 c0 = AVERAGE(c0, c2); 4191 c4 = AVERAGE(c4, c6); 4192 c0 = AVERAGE(c0, c4); 4193 4194 *(unsigned int*)(source0 + 4 * x) = c0; 4195 } 4196 4197 source0 += pitch; 4198 source1 += pitch; 4199 source2 += pitch; 4200 source3 += pitch; 4201 source4 += pitch; 4202 source5 += pitch; 4203 source6 += pitch; 4204 source7 += pitch; 4205 } 4206 } 4207 else if(internal.depth == 16) 4208 { 4209 for(int y = 0; y < height; y++) 4210 { 4211 for(int x = 0; x < width; x++) 4212 { 4213 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4214 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4215 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4216 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4217 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4218 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4219 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4220 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4221 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 4222 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 4223 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 4224 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 4225 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 4226 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 4227 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 4228 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 4229 4230 c0 = AVERAGE(c0, c1); 4231 c2 = AVERAGE(c2, c3); 4232 c4 = AVERAGE(c4, c5); 4233 c6 = AVERAGE(c6, c7); 4234 c8 = AVERAGE(c8, c9); 4235 cA = AVERAGE(cA, cB); 4236 cC = AVERAGE(cC, cD); 4237 cE = AVERAGE(cE, cF); 4238 c0 = AVERAGE(c0, c2); 4239 c4 = AVERAGE(c4, c6); 4240 c8 = AVERAGE(c8, cA); 4241 cC = AVERAGE(cC, cE); 4242 c0 = AVERAGE(c0, c4); 4243 c8 = AVERAGE(c8, cC); 4244 c0 = AVERAGE(c0, c8); 4245 4246 *(unsigned int*)(source0 + 4 * x) = c0; 4247 } 4248 4249 source0 += pitch; 4250 source1 += pitch; 4251 source2 += pitch; 4252 source3 += pitch; 4253 source4 += pitch; 4254 source5 += pitch; 4255 source6 += pitch; 4256 source7 += pitch; 4257 source8 += pitch; 4258 source9 += pitch; 4259 sourceA += pitch; 4260 sourceB += pitch; 4261 sourceC += pitch; 4262 sourceD += pitch; 4263 sourceE += pitch; 4264 sourceF += pitch; 4265 } 4266 } 4267 else ASSERT(false); 4268 4269 #undef AVERAGE 4270 } 4271 } 4272 else if(internal.format == FORMAT_A16B16G16R16) 4273 { 4274 if(CPUID::supportsSSE2() && (width % 2) == 0) 4275 { 4276 if(internal.depth == 2) 4277 { 4278 for(int y = 0; y < height; y++) 4279 { 4280 for(int x = 0; x < width; x += 2) 4281 { 4282 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4283 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4284 4285 c0 = _mm_avg_epu16(c0, c1); 4286 4287 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4288 } 4289 4290 source0 += pitch; 4291 source1 += pitch; 4292 } 4293 } 4294 else if(internal.depth == 4) 4295 { 4296 for(int y = 0; y < height; y++) 4297 { 4298 for(int x = 0; x < width; x += 2) 4299 { 4300 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4301 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4302 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4303 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4304 4305 c0 = _mm_avg_epu16(c0, c1); 4306 c2 = _mm_avg_epu16(c2, c3); 4307 c0 = _mm_avg_epu16(c0, c2); 4308 4309 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4310 } 4311 4312 source0 += pitch; 4313 source1 += pitch; 4314 source2 += pitch; 4315 source3 += pitch; 4316 } 4317 } 4318 else if(internal.depth == 8) 4319 { 4320 for(int y = 0; y < height; y++) 4321 { 4322 for(int x = 0; x < width; x += 2) 4323 { 4324 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4325 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4326 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4327 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4328 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x)); 4329 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x)); 4330 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x)); 4331 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x)); 4332 4333 c0 = _mm_avg_epu16(c0, c1); 4334 c2 = _mm_avg_epu16(c2, c3); 4335 c4 = _mm_avg_epu16(c4, c5); 4336 c6 = _mm_avg_epu16(c6, c7); 4337 c0 = _mm_avg_epu16(c0, c2); 4338 c4 = _mm_avg_epu16(c4, c6); 4339 c0 = _mm_avg_epu16(c0, c4); 4340 4341 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4342 } 4343 4344 source0 += pitch; 4345 source1 += pitch; 4346 source2 += pitch; 4347 source3 += pitch; 4348 source4 += pitch; 4349 source5 += pitch; 4350 source6 += pitch; 4351 source7 += pitch; 4352 } 4353 } 4354 else if(internal.depth == 16) 4355 { 4356 for(int y = 0; y < height; y++) 4357 { 4358 for(int x = 0; x < width; x += 2) 4359 { 4360 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4361 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4362 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4363 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4364 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x)); 4365 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x)); 4366 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x)); 4367 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x)); 4368 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 8 * x)); 4369 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 8 * x)); 4370 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 8 * x)); 4371 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 8 * x)); 4372 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 8 * x)); 4373 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 8 * x)); 4374 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 8 * x)); 4375 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 8 * x)); 4376 4377 c0 = _mm_avg_epu16(c0, c1); 4378 c2 = _mm_avg_epu16(c2, c3); 4379 c4 = _mm_avg_epu16(c4, c5); 4380 c6 = _mm_avg_epu16(c6, c7); 4381 c8 = _mm_avg_epu16(c8, c9); 4382 cA = _mm_avg_epu16(cA, cB); 4383 cC = _mm_avg_epu16(cC, cD); 4384 cE = _mm_avg_epu16(cE, cF); 4385 c0 = _mm_avg_epu16(c0, c2); 4386 c4 = _mm_avg_epu16(c4, c6); 4387 c8 = _mm_avg_epu16(c8, cA); 4388 cC = _mm_avg_epu16(cC, cE); 4389 c0 = _mm_avg_epu16(c0, c4); 4390 c8 = _mm_avg_epu16(c8, cC); 4391 c0 = _mm_avg_epu16(c0, c8); 4392 4393 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4394 } 4395 4396 source0 += pitch; 4397 source1 += pitch; 4398 source2 += pitch; 4399 source3 += pitch; 4400 source4 += pitch; 4401 source5 += pitch; 4402 source6 += pitch; 4403 source7 += pitch; 4404 source8 += pitch; 4405 source9 += pitch; 4406 sourceA += pitch; 4407 sourceB += pitch; 4408 sourceC += pitch; 4409 sourceD += pitch; 4410 sourceE += pitch; 4411 sourceF += pitch; 4412 } 4413 } 4414 else ASSERT(false); 4415 } 4416 else 4417 { 4418 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001)) 4419 4420 if(internal.depth == 2) 4421 { 4422 for(int y = 0; y < height; y++) 4423 { 4424 for(int x = 0; x < 2 * width; x++) 4425 { 4426 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4427 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4428 4429 c0 = AVERAGE(c0, c1); 4430 4431 *(unsigned int*)(source0 + 4 * x) = c0; 4432 } 4433 4434 source0 += pitch; 4435 source1 += pitch; 4436 } 4437 } 4438 else if(internal.depth == 4) 4439 { 4440 for(int y = 0; y < height; y++) 4441 { 4442 for(int x = 0; x < 2 * width; x++) 4443 { 4444 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4445 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4446 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4447 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4448 4449 c0 = AVERAGE(c0, c1); 4450 c2 = AVERAGE(c2, c3); 4451 c0 = AVERAGE(c0, c2); 4452 4453 *(unsigned int*)(source0 + 4 * x) = c0; 4454 } 4455 4456 source0 += pitch; 4457 source1 += pitch; 4458 source2 += pitch; 4459 source3 += pitch; 4460 } 4461 } 4462 else if(internal.depth == 8) 4463 { 4464 for(int y = 0; y < height; y++) 4465 { 4466 for(int x = 0; x < 2 * width; x++) 4467 { 4468 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4469 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4470 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4471 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4472 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4473 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4474 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4475 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4476 4477 c0 = AVERAGE(c0, c1); 4478 c2 = AVERAGE(c2, c3); 4479 c4 = AVERAGE(c4, c5); 4480 c6 = AVERAGE(c6, c7); 4481 c0 = AVERAGE(c0, c2); 4482 c4 = AVERAGE(c4, c6); 4483 c0 = AVERAGE(c0, c4); 4484 4485 *(unsigned int*)(source0 + 4 * x) = c0; 4486 } 4487 4488 source0 += pitch; 4489 source1 += pitch; 4490 source2 += pitch; 4491 source3 += pitch; 4492 source4 += pitch; 4493 source5 += pitch; 4494 source6 += pitch; 4495 source7 += pitch; 4496 } 4497 } 4498 else if(internal.depth == 16) 4499 { 4500 for(int y = 0; y < height; y++) 4501 { 4502 for(int x = 0; x < 2 * width; x++) 4503 { 4504 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4505 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4506 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4507 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4508 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4509 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4510 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4511 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4512 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 4513 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 4514 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 4515 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 4516 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 4517 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 4518 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 4519 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 4520 4521 c0 = AVERAGE(c0, c1); 4522 c2 = AVERAGE(c2, c3); 4523 c4 = AVERAGE(c4, c5); 4524 c6 = AVERAGE(c6, c7); 4525 c8 = AVERAGE(c8, c9); 4526 cA = AVERAGE(cA, cB); 4527 cC = AVERAGE(cC, cD); 4528 cE = AVERAGE(cE, cF); 4529 c0 = AVERAGE(c0, c2); 4530 c4 = AVERAGE(c4, c6); 4531 c8 = AVERAGE(c8, cA); 4532 cC = AVERAGE(cC, cE); 4533 c0 = AVERAGE(c0, c4); 4534 c8 = AVERAGE(c8, cC); 4535 c0 = AVERAGE(c0, c8); 4536 4537 *(unsigned int*)(source0 + 4 * x) = c0; 4538 } 4539 4540 source0 += pitch; 4541 source1 += pitch; 4542 source2 += pitch; 4543 source3 += pitch; 4544 source4 += pitch; 4545 source5 += pitch; 4546 source6 += pitch; 4547 source7 += pitch; 4548 source8 += pitch; 4549 source9 += pitch; 4550 sourceA += pitch; 4551 sourceB += pitch; 4552 sourceC += pitch; 4553 sourceD += pitch; 4554 sourceE += pitch; 4555 sourceF += pitch; 4556 } 4557 } 4558 else ASSERT(false); 4559 4560 #undef AVERAGE 4561 } 4562 } 4563 else if(internal.format == FORMAT_R32F) 4564 { 4565 if(CPUID::supportsSSE() && (width % 4) == 0) 4566 { 4567 if(internal.depth == 2) 4568 { 4569 for(int y = 0; y < height; y++) 4570 { 4571 for(int x = 0; x < width; x += 4) 4572 { 4573 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4574 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4575 4576 c0 = _mm_add_ps(c0, c1); 4577 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 4578 4579 _mm_store_ps((float*)(source0 + 4 * x), c0); 4580 } 4581 4582 source0 += pitch; 4583 source1 += pitch; 4584 } 4585 } 4586 else if(internal.depth == 4) 4587 { 4588 for(int y = 0; y < height; y++) 4589 { 4590 for(int x = 0; x < width; x += 4) 4591 { 4592 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4593 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4594 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4595 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4596 4597 c0 = _mm_add_ps(c0, c1); 4598 c2 = _mm_add_ps(c2, c3); 4599 c0 = _mm_add_ps(c0, c2); 4600 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 4601 4602 _mm_store_ps((float*)(source0 + 4 * x), c0); 4603 } 4604 4605 source0 += pitch; 4606 source1 += pitch; 4607 source2 += pitch; 4608 source3 += pitch; 4609 } 4610 } 4611 else if(internal.depth == 8) 4612 { 4613 for(int y = 0; y < height; y++) 4614 { 4615 for(int x = 0; x < width; x += 4) 4616 { 4617 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4618 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4619 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4620 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4621 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x)); 4622 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x)); 4623 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x)); 4624 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x)); 4625 4626 c0 = _mm_add_ps(c0, c1); 4627 c2 = _mm_add_ps(c2, c3); 4628 c4 = _mm_add_ps(c4, c5); 4629 c6 = _mm_add_ps(c6, c7); 4630 c0 = _mm_add_ps(c0, c2); 4631 c4 = _mm_add_ps(c4, c6); 4632 c0 = _mm_add_ps(c0, c4); 4633 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 4634 4635 _mm_store_ps((float*)(source0 + 4 * x), c0); 4636 } 4637 4638 source0 += pitch; 4639 source1 += pitch; 4640 source2 += pitch; 4641 source3 += pitch; 4642 source4 += pitch; 4643 source5 += pitch; 4644 source6 += pitch; 4645 source7 += pitch; 4646 } 4647 } 4648 else if(internal.depth == 16) 4649 { 4650 for(int y = 0; y < height; y++) 4651 { 4652 for(int x = 0; x < width; x += 4) 4653 { 4654 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4655 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4656 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4657 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4658 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x)); 4659 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x)); 4660 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x)); 4661 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x)); 4662 __m128 c8 = _mm_load_ps((float*)(source8 + 4 * x)); 4663 __m128 c9 = _mm_load_ps((float*)(source9 + 4 * x)); 4664 __m128 cA = _mm_load_ps((float*)(sourceA + 4 * x)); 4665 __m128 cB = _mm_load_ps((float*)(sourceB + 4 * x)); 4666 __m128 cC = _mm_load_ps((float*)(sourceC + 4 * x)); 4667 __m128 cD = _mm_load_ps((float*)(sourceD + 4 * x)); 4668 __m128 cE = _mm_load_ps((float*)(sourceE + 4 * x)); 4669 __m128 cF = _mm_load_ps((float*)(sourceF + 4 * x)); 4670 4671 c0 = _mm_add_ps(c0, c1); 4672 c2 = _mm_add_ps(c2, c3); 4673 c4 = _mm_add_ps(c4, c5); 4674 c6 = _mm_add_ps(c6, c7); 4675 c8 = _mm_add_ps(c8, c9); 4676 cA = _mm_add_ps(cA, cB); 4677 cC = _mm_add_ps(cC, cD); 4678 cE = _mm_add_ps(cE, cF); 4679 c0 = _mm_add_ps(c0, c2); 4680 c4 = _mm_add_ps(c4, c6); 4681 c8 = _mm_add_ps(c8, cA); 4682 cC = _mm_add_ps(cC, cE); 4683 c0 = _mm_add_ps(c0, c4); 4684 c8 = _mm_add_ps(c8, cC); 4685 c0 = _mm_add_ps(c0, c8); 4686 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 4687 4688 _mm_store_ps((float*)(source0 + 4 * x), c0); 4689 } 4690 4691 source0 += pitch; 4692 source1 += pitch; 4693 source2 += pitch; 4694 source3 += pitch; 4695 source4 += pitch; 4696 source5 += pitch; 4697 source6 += pitch; 4698 source7 += pitch; 4699 source8 += pitch; 4700 source9 += pitch; 4701 sourceA += pitch; 4702 sourceB += pitch; 4703 sourceC += pitch; 4704 sourceD += pitch; 4705 sourceE += pitch; 4706 sourceF += pitch; 4707 } 4708 } 4709 else ASSERT(false); 4710 } 4711 else 4712 { 4713 if(internal.depth == 2) 4714 { 4715 for(int y = 0; y < height; y++) 4716 { 4717 for(int x = 0; x < width; x++) 4718 { 4719 float c0 = *(float*)(source0 + 4 * x); 4720 float c1 = *(float*)(source1 + 4 * x); 4721 4722 c0 = c0 + c1; 4723 c0 *= 1.0f / 2.0f; 4724 4725 *(float*)(source0 + 4 * x) = c0; 4726 } 4727 4728 source0 += pitch; 4729 source1 += pitch; 4730 } 4731 } 4732 else if(internal.depth == 4) 4733 { 4734 for(int y = 0; y < height; y++) 4735 { 4736 for(int x = 0; x < width; x++) 4737 { 4738 float c0 = *(float*)(source0 + 4 * x); 4739 float c1 = *(float*)(source1 + 4 * x); 4740 float c2 = *(float*)(source2 + 4 * x); 4741 float c3 = *(float*)(source3 + 4 * x); 4742 4743 c0 = c0 + c1; 4744 c2 = c2 + c3; 4745 c0 = c0 + c2; 4746 c0 *= 1.0f / 4.0f; 4747 4748 *(float*)(source0 + 4 * x) = c0; 4749 } 4750 4751 source0 += pitch; 4752 source1 += pitch; 4753 source2 += pitch; 4754 source3 += pitch; 4755 } 4756 } 4757 else if(internal.depth == 8) 4758 { 4759 for(int y = 0; y < height; y++) 4760 { 4761 for(int x = 0; x < width; x++) 4762 { 4763 float c0 = *(float*)(source0 + 4 * x); 4764 float c1 = *(float*)(source1 + 4 * x); 4765 float c2 = *(float*)(source2 + 4 * x); 4766 float c3 = *(float*)(source3 + 4 * x); 4767 float c4 = *(float*)(source4 + 4 * x); 4768 float c5 = *(float*)(source5 + 4 * x); 4769 float c6 = *(float*)(source6 + 4 * x); 4770 float c7 = *(float*)(source7 + 4 * x); 4771 4772 c0 = c0 + c1; 4773 c2 = c2 + c3; 4774 c4 = c4 + c5; 4775 c6 = c6 + c7; 4776 c0 = c0 + c2; 4777 c4 = c4 + c6; 4778 c0 = c0 + c4; 4779 c0 *= 1.0f / 8.0f; 4780 4781 *(float*)(source0 + 4 * x) = c0; 4782 } 4783 4784 source0 += pitch; 4785 source1 += pitch; 4786 source2 += pitch; 4787 source3 += pitch; 4788 source4 += pitch; 4789 source5 += pitch; 4790 source6 += pitch; 4791 source7 += pitch; 4792 } 4793 } 4794 else if(internal.depth == 16) 4795 { 4796 for(int y = 0; y < height; y++) 4797 { 4798 for(int x = 0; x < width; x++) 4799 { 4800 float c0 = *(float*)(source0 + 4 * x); 4801 float c1 = *(float*)(source1 + 4 * x); 4802 float c2 = *(float*)(source2 + 4 * x); 4803 float c3 = *(float*)(source3 + 4 * x); 4804 float c4 = *(float*)(source4 + 4 * x); 4805 float c5 = *(float*)(source5 + 4 * x); 4806 float c6 = *(float*)(source6 + 4 * x); 4807 float c7 = *(float*)(source7 + 4 * x); 4808 float c8 = *(float*)(source8 + 4 * x); 4809 float c9 = *(float*)(source9 + 4 * x); 4810 float cA = *(float*)(sourceA + 4 * x); 4811 float cB = *(float*)(sourceB + 4 * x); 4812 float cC = *(float*)(sourceC + 4 * x); 4813 float cD = *(float*)(sourceD + 4 * x); 4814 float cE = *(float*)(sourceE + 4 * x); 4815 float cF = *(float*)(sourceF + 4 * x); 4816 4817 c0 = c0 + c1; 4818 c2 = c2 + c3; 4819 c4 = c4 + c5; 4820 c6 = c6 + c7; 4821 c8 = c8 + c9; 4822 cA = cA + cB; 4823 cC = cC + cD; 4824 cE = cE + cF; 4825 c0 = c0 + c2; 4826 c4 = c4 + c6; 4827 c8 = c8 + cA; 4828 cC = cC + cE; 4829 c0 = c0 + c4; 4830 c8 = c8 + cC; 4831 c0 = c0 + c8; 4832 c0 *= 1.0f / 16.0f; 4833 4834 *(float*)(source0 + 4 * x) = c0; 4835 } 4836 4837 source0 += pitch; 4838 source1 += pitch; 4839 source2 += pitch; 4840 source3 += pitch; 4841 source4 += pitch; 4842 source5 += pitch; 4843 source6 += pitch; 4844 source7 += pitch; 4845 source8 += pitch; 4846 source9 += pitch; 4847 sourceA += pitch; 4848 sourceB += pitch; 4849 sourceC += pitch; 4850 sourceD += pitch; 4851 sourceE += pitch; 4852 sourceF += pitch; 4853 } 4854 } 4855 else ASSERT(false); 4856 } 4857 } 4858 else if(internal.format == FORMAT_G32R32F) 4859 { 4860 if(CPUID::supportsSSE() && (width % 2) == 0) 4861 { 4862 if(internal.depth == 2) 4863 { 4864 for(int y = 0; y < height; y++) 4865 { 4866 for(int x = 0; x < width; x += 2) 4867 { 4868 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 4869 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 4870 4871 c0 = _mm_add_ps(c0, c1); 4872 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 4873 4874 _mm_store_ps((float*)(source0 + 8 * x), c0); 4875 } 4876 4877 source0 += pitch; 4878 source1 += pitch; 4879 } 4880 } 4881 else if(internal.depth == 4) 4882 { 4883 for(int y = 0; y < height; y++) 4884 { 4885 for(int x = 0; x < width; x += 2) 4886 { 4887 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 4888 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 4889 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 4890 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 4891 4892 c0 = _mm_add_ps(c0, c1); 4893 c2 = _mm_add_ps(c2, c3); 4894 c0 = _mm_add_ps(c0, c2); 4895 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 4896 4897 _mm_store_ps((float*)(source0 + 8 * x), c0); 4898 } 4899 4900 source0 += pitch; 4901 source1 += pitch; 4902 source2 += pitch; 4903 source3 += pitch; 4904 } 4905 } 4906 else if(internal.depth == 8) 4907 { 4908 for(int y = 0; y < height; y++) 4909 { 4910 for(int x = 0; x < width; x += 2) 4911 { 4912 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 4913 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 4914 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 4915 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 4916 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x)); 4917 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x)); 4918 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x)); 4919 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x)); 4920 4921 c0 = _mm_add_ps(c0, c1); 4922 c2 = _mm_add_ps(c2, c3); 4923 c4 = _mm_add_ps(c4, c5); 4924 c6 = _mm_add_ps(c6, c7); 4925 c0 = _mm_add_ps(c0, c2); 4926 c4 = _mm_add_ps(c4, c6); 4927 c0 = _mm_add_ps(c0, c4); 4928 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 4929 4930 _mm_store_ps((float*)(source0 + 8 * x), c0); 4931 } 4932 4933 source0 += pitch; 4934 source1 += pitch; 4935 source2 += pitch; 4936 source3 += pitch; 4937 source4 += pitch; 4938 source5 += pitch; 4939 source6 += pitch; 4940 source7 += pitch; 4941 } 4942 } 4943 else if(internal.depth == 16) 4944 { 4945 for(int y = 0; y < height; y++) 4946 { 4947 for(int x = 0; x < width; x += 2) 4948 { 4949 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 4950 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 4951 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 4952 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 4953 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x)); 4954 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x)); 4955 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x)); 4956 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x)); 4957 __m128 c8 = _mm_load_ps((float*)(source8 + 8 * x)); 4958 __m128 c9 = _mm_load_ps((float*)(source9 + 8 * x)); 4959 __m128 cA = _mm_load_ps((float*)(sourceA + 8 * x)); 4960 __m128 cB = _mm_load_ps((float*)(sourceB + 8 * x)); 4961 __m128 cC = _mm_load_ps((float*)(sourceC + 8 * x)); 4962 __m128 cD = _mm_load_ps((float*)(sourceD + 8 * x)); 4963 __m128 cE = _mm_load_ps((float*)(sourceE + 8 * x)); 4964 __m128 cF = _mm_load_ps((float*)(sourceF + 8 * x)); 4965 4966 c0 = _mm_add_ps(c0, c1); 4967 c2 = _mm_add_ps(c2, c3); 4968 c4 = _mm_add_ps(c4, c5); 4969 c6 = _mm_add_ps(c6, c7); 4970 c8 = _mm_add_ps(c8, c9); 4971 cA = _mm_add_ps(cA, cB); 4972 cC = _mm_add_ps(cC, cD); 4973 cE = _mm_add_ps(cE, cF); 4974 c0 = _mm_add_ps(c0, c2); 4975 c4 = _mm_add_ps(c4, c6); 4976 c8 = _mm_add_ps(c8, cA); 4977 cC = _mm_add_ps(cC, cE); 4978 c0 = _mm_add_ps(c0, c4); 4979 c8 = _mm_add_ps(c8, cC); 4980 c0 = _mm_add_ps(c0, c8); 4981 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 4982 4983 _mm_store_ps((float*)(source0 + 8 * x), c0); 4984 } 4985 4986 source0 += pitch; 4987 source1 += pitch; 4988 source2 += pitch; 4989 source3 += pitch; 4990 source4 += pitch; 4991 source5 += pitch; 4992 source6 += pitch; 4993 source7 += pitch; 4994 source8 += pitch; 4995 source9 += pitch; 4996 sourceA += pitch; 4997 sourceB += pitch; 4998 sourceC += pitch; 4999 sourceD += pitch; 5000 sourceE += pitch; 5001 sourceF += pitch; 5002 } 5003 } 5004 else ASSERT(false); 5005 } 5006 else 5007 { 5008 if(internal.depth == 2) 5009 { 5010 for(int y = 0; y < height; y++) 5011 { 5012 for(int x = 0; x < 2 * width; x++) 5013 { 5014 float c0 = *(float*)(source0 + 4 * x); 5015 float c1 = *(float*)(source1 + 4 * x); 5016 5017 c0 = c0 + c1; 5018 c0 *= 1.0f / 2.0f; 5019 5020 *(float*)(source0 + 4 * x) = c0; 5021 } 5022 5023 source0 += pitch; 5024 source1 += pitch; 5025 } 5026 } 5027 else if(internal.depth == 4) 5028 { 5029 for(int y = 0; y < height; y++) 5030 { 5031 for(int x = 0; x < 2 * width; x++) 5032 { 5033 float c0 = *(float*)(source0 + 4 * x); 5034 float c1 = *(float*)(source1 + 4 * x); 5035 float c2 = *(float*)(source2 + 4 * x); 5036 float c3 = *(float*)(source3 + 4 * x); 5037 5038 c0 = c0 + c1; 5039 c2 = c2 + c3; 5040 c0 = c0 + c2; 5041 c0 *= 1.0f / 4.0f; 5042 5043 *(float*)(source0 + 4 * x) = c0; 5044 } 5045 5046 source0 += pitch; 5047 source1 += pitch; 5048 source2 += pitch; 5049 source3 += pitch; 5050 } 5051 } 5052 else if(internal.depth == 8) 5053 { 5054 for(int y = 0; y < height; y++) 5055 { 5056 for(int x = 0; x < 2 * width; x++) 5057 { 5058 float c0 = *(float*)(source0 + 4 * x); 5059 float c1 = *(float*)(source1 + 4 * x); 5060 float c2 = *(float*)(source2 + 4 * x); 5061 float c3 = *(float*)(source3 + 4 * x); 5062 float c4 = *(float*)(source4 + 4 * x); 5063 float c5 = *(float*)(source5 + 4 * x); 5064 float c6 = *(float*)(source6 + 4 * x); 5065 float c7 = *(float*)(source7 + 4 * x); 5066 5067 c0 = c0 + c1; 5068 c2 = c2 + c3; 5069 c4 = c4 + c5; 5070 c6 = c6 + c7; 5071 c0 = c0 + c2; 5072 c4 = c4 + c6; 5073 c0 = c0 + c4; 5074 c0 *= 1.0f / 8.0f; 5075 5076 *(float*)(source0 + 4 * x) = c0; 5077 } 5078 5079 source0 += pitch; 5080 source1 += pitch; 5081 source2 += pitch; 5082 source3 += pitch; 5083 source4 += pitch; 5084 source5 += pitch; 5085 source6 += pitch; 5086 source7 += pitch; 5087 } 5088 } 5089 else if(internal.depth == 16) 5090 { 5091 for(int y = 0; y < height; y++) 5092 { 5093 for(int x = 0; x < 2 * width; x++) 5094 { 5095 float c0 = *(float*)(source0 + 4 * x); 5096 float c1 = *(float*)(source1 + 4 * x); 5097 float c2 = *(float*)(source2 + 4 * x); 5098 float c3 = *(float*)(source3 + 4 * x); 5099 float c4 = *(float*)(source4 + 4 * x); 5100 float c5 = *(float*)(source5 + 4 * x); 5101 float c6 = *(float*)(source6 + 4 * x); 5102 float c7 = *(float*)(source7 + 4 * x); 5103 float c8 = *(float*)(source8 + 4 * x); 5104 float c9 = *(float*)(source9 + 4 * x); 5105 float cA = *(float*)(sourceA + 4 * x); 5106 float cB = *(float*)(sourceB + 4 * x); 5107 float cC = *(float*)(sourceC + 4 * x); 5108 float cD = *(float*)(sourceD + 4 * x); 5109 float cE = *(float*)(sourceE + 4 * x); 5110 float cF = *(float*)(sourceF + 4 * x); 5111 5112 c0 = c0 + c1; 5113 c2 = c2 + c3; 5114 c4 = c4 + c5; 5115 c6 = c6 + c7; 5116 c8 = c8 + c9; 5117 cA = cA + cB; 5118 cC = cC + cD; 5119 cE = cE + cF; 5120 c0 = c0 + c2; 5121 c4 = c4 + c6; 5122 c8 = c8 + cA; 5123 cC = cC + cE; 5124 c0 = c0 + c4; 5125 c8 = c8 + cC; 5126 c0 = c0 + c8; 5127 c0 *= 1.0f / 16.0f; 5128 5129 *(float*)(source0 + 4 * x) = c0; 5130 } 5131 5132 source0 += pitch; 5133 source1 += pitch; 5134 source2 += pitch; 5135 source3 += pitch; 5136 source4 += pitch; 5137 source5 += pitch; 5138 source6 += pitch; 5139 source7 += pitch; 5140 source8 += pitch; 5141 source9 += pitch; 5142 sourceA += pitch; 5143 sourceB += pitch; 5144 sourceC += pitch; 5145 sourceD += pitch; 5146 sourceE += pitch; 5147 sourceF += pitch; 5148 } 5149 } 5150 else ASSERT(false); 5151 } 5152 } 5153 else if(internal.format == FORMAT_A32B32G32R32F || internal.format == FORMAT_X32B32G32R32F) 5154 { 5155 if(CPUID::supportsSSE()) 5156 { 5157 if(internal.depth == 2) 5158 { 5159 for(int y = 0; y < height; y++) 5160 { 5161 for(int x = 0; x < width; x++) 5162 { 5163 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5164 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5165 5166 c0 = _mm_add_ps(c0, c1); 5167 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 5168 5169 _mm_store_ps((float*)(source0 + 16 * x), c0); 5170 } 5171 5172 source0 += pitch; 5173 source1 += pitch; 5174 } 5175 } 5176 else if(internal.depth == 4) 5177 { 5178 for(int y = 0; y < height; y++) 5179 { 5180 for(int x = 0; x < width; x++) 5181 { 5182 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5183 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5184 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5185 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5186 5187 c0 = _mm_add_ps(c0, c1); 5188 c2 = _mm_add_ps(c2, c3); 5189 c0 = _mm_add_ps(c0, c2); 5190 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 5191 5192 _mm_store_ps((float*)(source0 + 16 * x), c0); 5193 } 5194 5195 source0 += pitch; 5196 source1 += pitch; 5197 source2 += pitch; 5198 source3 += pitch; 5199 } 5200 } 5201 else if(internal.depth == 8) 5202 { 5203 for(int y = 0; y < height; y++) 5204 { 5205 for(int x = 0; x < width; x++) 5206 { 5207 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5208 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5209 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5210 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5211 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x)); 5212 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x)); 5213 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x)); 5214 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x)); 5215 5216 c0 = _mm_add_ps(c0, c1); 5217 c2 = _mm_add_ps(c2, c3); 5218 c4 = _mm_add_ps(c4, c5); 5219 c6 = _mm_add_ps(c6, c7); 5220 c0 = _mm_add_ps(c0, c2); 5221 c4 = _mm_add_ps(c4, c6); 5222 c0 = _mm_add_ps(c0, c4); 5223 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 5224 5225 _mm_store_ps((float*)(source0 + 16 * x), c0); 5226 } 5227 5228 source0 += pitch; 5229 source1 += pitch; 5230 source2 += pitch; 5231 source3 += pitch; 5232 source4 += pitch; 5233 source5 += pitch; 5234 source6 += pitch; 5235 source7 += pitch; 5236 } 5237 } 5238 else if(internal.depth == 16) 5239 { 5240 for(int y = 0; y < height; y++) 5241 { 5242 for(int x = 0; x < width; x++) 5243 { 5244 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5245 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5246 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5247 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5248 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x)); 5249 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x)); 5250 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x)); 5251 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x)); 5252 __m128 c8 = _mm_load_ps((float*)(source8 + 16 * x)); 5253 __m128 c9 = _mm_load_ps((float*)(source9 + 16 * x)); 5254 __m128 cA = _mm_load_ps((float*)(sourceA + 16 * x)); 5255 __m128 cB = _mm_load_ps((float*)(sourceB + 16 * x)); 5256 __m128 cC = _mm_load_ps((float*)(sourceC + 16 * x)); 5257 __m128 cD = _mm_load_ps((float*)(sourceD + 16 * x)); 5258 __m128 cE = _mm_load_ps((float*)(sourceE + 16 * x)); 5259 __m128 cF = _mm_load_ps((float*)(sourceF + 16 * x)); 5260 5261 c0 = _mm_add_ps(c0, c1); 5262 c2 = _mm_add_ps(c2, c3); 5263 c4 = _mm_add_ps(c4, c5); 5264 c6 = _mm_add_ps(c6, c7); 5265 c8 = _mm_add_ps(c8, c9); 5266 cA = _mm_add_ps(cA, cB); 5267 cC = _mm_add_ps(cC, cD); 5268 cE = _mm_add_ps(cE, cF); 5269 c0 = _mm_add_ps(c0, c2); 5270 c4 = _mm_add_ps(c4, c6); 5271 c8 = _mm_add_ps(c8, cA); 5272 cC = _mm_add_ps(cC, cE); 5273 c0 = _mm_add_ps(c0, c4); 5274 c8 = _mm_add_ps(c8, cC); 5275 c0 = _mm_add_ps(c0, c8); 5276 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 5277 5278 _mm_store_ps((float*)(source0 + 16 * x), c0); 5279 } 5280 5281 source0 += pitch; 5282 source1 += pitch; 5283 source2 += pitch; 5284 source3 += pitch; 5285 source4 += pitch; 5286 source5 += pitch; 5287 source6 += pitch; 5288 source7 += pitch; 5289 source8 += pitch; 5290 source9 += pitch; 5291 sourceA += pitch; 5292 sourceB += pitch; 5293 sourceC += pitch; 5294 sourceD += pitch; 5295 sourceE += pitch; 5296 sourceF += pitch; 5297 } 5298 } 5299 else ASSERT(false); 5300 } 5301 else 5302 { 5303 if(internal.depth == 2) 5304 { 5305 for(int y = 0; y < height; y++) 5306 { 5307 for(int x = 0; x < 4 * width; x++) 5308 { 5309 float c0 = *(float*)(source0 + 4 * x); 5310 float c1 = *(float*)(source1 + 4 * x); 5311 5312 c0 = c0 + c1; 5313 c0 *= 1.0f / 2.0f; 5314 5315 *(float*)(source0 + 4 * x) = c0; 5316 } 5317 5318 source0 += pitch; 5319 source1 += pitch; 5320 } 5321 } 5322 else if(internal.depth == 4) 5323 { 5324 for(int y = 0; y < height; y++) 5325 { 5326 for(int x = 0; x < 4 * width; x++) 5327 { 5328 float c0 = *(float*)(source0 + 4 * x); 5329 float c1 = *(float*)(source1 + 4 * x); 5330 float c2 = *(float*)(source2 + 4 * x); 5331 float c3 = *(float*)(source3 + 4 * x); 5332 5333 c0 = c0 + c1; 5334 c2 = c2 + c3; 5335 c0 = c0 + c2; 5336 c0 *= 1.0f / 4.0f; 5337 5338 *(float*)(source0 + 4 * x) = c0; 5339 } 5340 5341 source0 += pitch; 5342 source1 += pitch; 5343 source2 += pitch; 5344 source3 += pitch; 5345 } 5346 } 5347 else if(internal.depth == 8) 5348 { 5349 for(int y = 0; y < height; y++) 5350 { 5351 for(int x = 0; x < 4 * width; x++) 5352 { 5353 float c0 = *(float*)(source0 + 4 * x); 5354 float c1 = *(float*)(source1 + 4 * x); 5355 float c2 = *(float*)(source2 + 4 * x); 5356 float c3 = *(float*)(source3 + 4 * x); 5357 float c4 = *(float*)(source4 + 4 * x); 5358 float c5 = *(float*)(source5 + 4 * x); 5359 float c6 = *(float*)(source6 + 4 * x); 5360 float c7 = *(float*)(source7 + 4 * x); 5361 5362 c0 = c0 + c1; 5363 c2 = c2 + c3; 5364 c4 = c4 + c5; 5365 c6 = c6 + c7; 5366 c0 = c0 + c2; 5367 c4 = c4 + c6; 5368 c0 = c0 + c4; 5369 c0 *= 1.0f / 8.0f; 5370 5371 *(float*)(source0 + 4 * x) = c0; 5372 } 5373 5374 source0 += pitch; 5375 source1 += pitch; 5376 source2 += pitch; 5377 source3 += pitch; 5378 source4 += pitch; 5379 source5 += pitch; 5380 source6 += pitch; 5381 source7 += pitch; 5382 } 5383 } 5384 else if(internal.depth == 16) 5385 { 5386 for(int y = 0; y < height; y++) 5387 { 5388 for(int x = 0; x < 4 * width; x++) 5389 { 5390 float c0 = *(float*)(source0 + 4 * x); 5391 float c1 = *(float*)(source1 + 4 * x); 5392 float c2 = *(float*)(source2 + 4 * x); 5393 float c3 = *(float*)(source3 + 4 * x); 5394 float c4 = *(float*)(source4 + 4 * x); 5395 float c5 = *(float*)(source5 + 4 * x); 5396 float c6 = *(float*)(source6 + 4 * x); 5397 float c7 = *(float*)(source7 + 4 * x); 5398 float c8 = *(float*)(source8 + 4 * x); 5399 float c9 = *(float*)(source9 + 4 * x); 5400 float cA = *(float*)(sourceA + 4 * x); 5401 float cB = *(float*)(sourceB + 4 * x); 5402 float cC = *(float*)(sourceC + 4 * x); 5403 float cD = *(float*)(sourceD + 4 * x); 5404 float cE = *(float*)(sourceE + 4 * x); 5405 float cF = *(float*)(sourceF + 4 * x); 5406 5407 c0 = c0 + c1; 5408 c2 = c2 + c3; 5409 c4 = c4 + c5; 5410 c6 = c6 + c7; 5411 c8 = c8 + c9; 5412 cA = cA + cB; 5413 cC = cC + cD; 5414 cE = cE + cF; 5415 c0 = c0 + c2; 5416 c4 = c4 + c6; 5417 c8 = c8 + cA; 5418 cC = cC + cE; 5419 c0 = c0 + c4; 5420 c8 = c8 + cC; 5421 c0 = c0 + c8; 5422 c0 *= 1.0f / 16.0f; 5423 5424 *(float*)(source0 + 4 * x) = c0; 5425 } 5426 5427 source0 += pitch; 5428 source1 += pitch; 5429 source2 += pitch; 5430 source3 += pitch; 5431 source4 += pitch; 5432 source5 += pitch; 5433 source6 += pitch; 5434 source7 += pitch; 5435 source8 += pitch; 5436 source9 += pitch; 5437 sourceA += pitch; 5438 sourceB += pitch; 5439 sourceC += pitch; 5440 sourceD += pitch; 5441 sourceE += pitch; 5442 sourceF += pitch; 5443 } 5444 } 5445 else ASSERT(false); 5446 } 5447 } 5448 else if(internal.format == FORMAT_R5G6B5) 5449 { 5450 if(CPUID::supportsSSE2() && (width % 8) == 0) 5451 { 5452 if(internal.depth == 2) 5453 { 5454 for(int y = 0; y < height; y++) 5455 { 5456 for(int x = 0; x < width; x += 8) 5457 { 5458 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5459 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5460 5461 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5462 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5463 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5464 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5465 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5466 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5467 5468 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5469 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5470 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5471 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5472 c0 = _mm_or_si128(c0, c1); 5473 5474 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5475 } 5476 5477 source0 += pitch; 5478 source1 += pitch; 5479 } 5480 } 5481 else if(internal.depth == 4) 5482 { 5483 for(int y = 0; y < height; y++) 5484 { 5485 for(int x = 0; x < width; x += 8) 5486 { 5487 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5488 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5489 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5490 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5491 5492 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5493 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5494 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5495 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5496 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5497 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5498 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5499 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5500 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5501 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5502 5503 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5504 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5505 c0 = _mm_avg_epu8(c0, c2); 5506 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5507 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5508 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5509 c1 = _mm_avg_epu16(c1, c3); 5510 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5511 c0 = _mm_or_si128(c0, c1); 5512 5513 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5514 } 5515 5516 source0 += pitch; 5517 source1 += pitch; 5518 source2 += pitch; 5519 source3 += pitch; 5520 } 5521 } 5522 else if(internal.depth == 8) 5523 { 5524 for(int y = 0; y < height; y++) 5525 { 5526 for(int x = 0; x < width; x += 8) 5527 { 5528 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5529 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5530 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5531 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5532 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x)); 5533 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x)); 5534 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x)); 5535 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x)); 5536 5537 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5538 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5539 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5540 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5541 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5542 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5543 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5544 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5545 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5546 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5547 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b)); 5548 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_)); 5549 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b)); 5550 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_)); 5551 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b)); 5552 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_)); 5553 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b)); 5554 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_)); 5555 5556 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5557 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5558 c4 = _mm_avg_epu8(c4_r_b, c5_r_b); 5559 c6 = _mm_avg_epu8(c6_r_b, c7_r_b); 5560 c0 = _mm_avg_epu8(c0, c2); 5561 c4 = _mm_avg_epu8(c4, c6); 5562 c0 = _mm_avg_epu8(c0, c4); 5563 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5564 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5565 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5566 c5 = _mm_avg_epu16(c4__g_, c5__g_); 5567 c7 = _mm_avg_epu16(c6__g_, c7__g_); 5568 c1 = _mm_avg_epu16(c1, c3); 5569 c5 = _mm_avg_epu16(c5, c7); 5570 c1 = _mm_avg_epu16(c1, c5); 5571 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5572 c0 = _mm_or_si128(c0, c1); 5573 5574 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5575 } 5576 5577 source0 += pitch; 5578 source1 += pitch; 5579 source2 += pitch; 5580 source3 += pitch; 5581 source4 += pitch; 5582 source5 += pitch; 5583 source6 += pitch; 5584 source7 += pitch; 5585 } 5586 } 5587 else if(internal.depth == 16) 5588 { 5589 for(int y = 0; y < height; y++) 5590 { 5591 for(int x = 0; x < width; x += 8) 5592 { 5593 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5594 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5595 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5596 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5597 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x)); 5598 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x)); 5599 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x)); 5600 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x)); 5601 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 2 * x)); 5602 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 2 * x)); 5603 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 2 * x)); 5604 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 2 * x)); 5605 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 2 * x)); 5606 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 2 * x)); 5607 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 2 * x)); 5608 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 2 * x)); 5609 5610 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5611 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5612 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5613 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5614 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5615 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5616 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5617 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5618 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5619 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5620 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b)); 5621 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_)); 5622 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b)); 5623 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_)); 5624 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b)); 5625 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_)); 5626 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b)); 5627 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_)); 5628 __m128i c8_r_b = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(r_b)); 5629 __m128i c8__g_ = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(_g_)); 5630 __m128i c9_r_b = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(r_b)); 5631 __m128i c9__g_ = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(_g_)); 5632 __m128i cA_r_b = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(r_b)); 5633 __m128i cA__g_ = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(_g_)); 5634 __m128i cB_r_b = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(r_b)); 5635 __m128i cB__g_ = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(_g_)); 5636 __m128i cC_r_b = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(r_b)); 5637 __m128i cC__g_ = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(_g_)); 5638 __m128i cD_r_b = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(r_b)); 5639 __m128i cD__g_ = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(_g_)); 5640 __m128i cE_r_b = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(r_b)); 5641 __m128i cE__g_ = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(_g_)); 5642 __m128i cF_r_b = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(r_b)); 5643 __m128i cF__g_ = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(_g_)); 5644 5645 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5646 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5647 c4 = _mm_avg_epu8(c4_r_b, c5_r_b); 5648 c6 = _mm_avg_epu8(c6_r_b, c7_r_b); 5649 c8 = _mm_avg_epu8(c8_r_b, c9_r_b); 5650 cA = _mm_avg_epu8(cA_r_b, cB_r_b); 5651 cC = _mm_avg_epu8(cC_r_b, cD_r_b); 5652 cE = _mm_avg_epu8(cE_r_b, cF_r_b); 5653 c0 = _mm_avg_epu8(c0, c2); 5654 c4 = _mm_avg_epu8(c4, c6); 5655 c8 = _mm_avg_epu8(c8, cA); 5656 cC = _mm_avg_epu8(cC, cE); 5657 c0 = _mm_avg_epu8(c0, c4); 5658 c8 = _mm_avg_epu8(c8, cC); 5659 c0 = _mm_avg_epu8(c0, c8); 5660 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5661 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5662 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5663 c5 = _mm_avg_epu16(c4__g_, c5__g_); 5664 c7 = _mm_avg_epu16(c6__g_, c7__g_); 5665 c9 = _mm_avg_epu16(c8__g_, c9__g_); 5666 cB = _mm_avg_epu16(cA__g_, cB__g_); 5667 cD = _mm_avg_epu16(cC__g_, cD__g_); 5668 cF = _mm_avg_epu16(cE__g_, cF__g_); 5669 c1 = _mm_avg_epu8(c1, c3); 5670 c5 = _mm_avg_epu8(c5, c7); 5671 c9 = _mm_avg_epu8(c9, cB); 5672 cD = _mm_avg_epu8(cD, cF); 5673 c1 = _mm_avg_epu8(c1, c5); 5674 c9 = _mm_avg_epu8(c9, cD); 5675 c1 = _mm_avg_epu8(c1, c9); 5676 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5677 c0 = _mm_or_si128(c0, c1); 5678 5679 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5680 } 5681 5682 source0 += pitch; 5683 source1 += pitch; 5684 source2 += pitch; 5685 source3 += pitch; 5686 source4 += pitch; 5687 source5 += pitch; 5688 source6 += pitch; 5689 source7 += pitch; 5690 source8 += pitch; 5691 source9 += pitch; 5692 sourceA += pitch; 5693 sourceB += pitch; 5694 sourceC += pitch; 5695 sourceD += pitch; 5696 sourceE += pitch; 5697 sourceF += pitch; 5698 } 5699 } 5700 else ASSERT(false); 5701 } 5702 else 5703 { 5704 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7BEF) + (((x) ^ (y)) & 0x0821)) 5705 5706 if(internal.depth == 2) 5707 { 5708 for(int y = 0; y < height; y++) 5709 { 5710 for(int x = 0; x < width; x++) 5711 { 5712 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5713 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5714 5715 c0 = AVERAGE(c0, c1); 5716 5717 *(unsigned short*)(source0 + 2 * x) = c0; 5718 } 5719 5720 source0 += pitch; 5721 source1 += pitch; 5722 } 5723 } 5724 else if(internal.depth == 4) 5725 { 5726 for(int y = 0; y < height; y++) 5727 { 5728 for(int x = 0; x < width; x++) 5729 { 5730 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5731 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5732 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 5733 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 5734 5735 c0 = AVERAGE(c0, c1); 5736 c2 = AVERAGE(c2, c3); 5737 c0 = AVERAGE(c0, c2); 5738 5739 *(unsigned short*)(source0 + 2 * x) = c0; 5740 } 5741 5742 source0 += pitch; 5743 source1 += pitch; 5744 source2 += pitch; 5745 source3 += pitch; 5746 } 5747 } 5748 else if(internal.depth == 8) 5749 { 5750 for(int y = 0; y < height; y++) 5751 { 5752 for(int x = 0; x < width; x++) 5753 { 5754 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5755 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5756 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 5757 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 5758 unsigned short c4 = *(unsigned short*)(source4 + 2 * x); 5759 unsigned short c5 = *(unsigned short*)(source5 + 2 * x); 5760 unsigned short c6 = *(unsigned short*)(source6 + 2 * x); 5761 unsigned short c7 = *(unsigned short*)(source7 + 2 * x); 5762 5763 c0 = AVERAGE(c0, c1); 5764 c2 = AVERAGE(c2, c3); 5765 c4 = AVERAGE(c4, c5); 5766 c6 = AVERAGE(c6, c7); 5767 c0 = AVERAGE(c0, c2); 5768 c4 = AVERAGE(c4, c6); 5769 c0 = AVERAGE(c0, c4); 5770 5771 *(unsigned short*)(source0 + 2 * x) = c0; 5772 } 5773 5774 source0 += pitch; 5775 source1 += pitch; 5776 source2 += pitch; 5777 source3 += pitch; 5778 source4 += pitch; 5779 source5 += pitch; 5780 source6 += pitch; 5781 source7 += pitch; 5782 } 5783 } 5784 else if(internal.depth == 16) 5785 { 5786 for(int y = 0; y < height; y++) 5787 { 5788 for(int x = 0; x < width; x++) 5789 { 5790 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5791 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5792 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 5793 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 5794 unsigned short c4 = *(unsigned short*)(source4 + 2 * x); 5795 unsigned short c5 = *(unsigned short*)(source5 + 2 * x); 5796 unsigned short c6 = *(unsigned short*)(source6 + 2 * x); 5797 unsigned short c7 = *(unsigned short*)(source7 + 2 * x); 5798 unsigned short c8 = *(unsigned short*)(source8 + 2 * x); 5799 unsigned short c9 = *(unsigned short*)(source9 + 2 * x); 5800 unsigned short cA = *(unsigned short*)(sourceA + 2 * x); 5801 unsigned short cB = *(unsigned short*)(sourceB + 2 * x); 5802 unsigned short cC = *(unsigned short*)(sourceC + 2 * x); 5803 unsigned short cD = *(unsigned short*)(sourceD + 2 * x); 5804 unsigned short cE = *(unsigned short*)(sourceE + 2 * x); 5805 unsigned short cF = *(unsigned short*)(sourceF + 2 * x); 5806 5807 c0 = AVERAGE(c0, c1); 5808 c2 = AVERAGE(c2, c3); 5809 c4 = AVERAGE(c4, c5); 5810 c6 = AVERAGE(c6, c7); 5811 c8 = AVERAGE(c8, c9); 5812 cA = AVERAGE(cA, cB); 5813 cC = AVERAGE(cC, cD); 5814 cE = AVERAGE(cE, cF); 5815 c0 = AVERAGE(c0, c2); 5816 c4 = AVERAGE(c4, c6); 5817 c8 = AVERAGE(c8, cA); 5818 cC = AVERAGE(cC, cE); 5819 c0 = AVERAGE(c0, c4); 5820 c8 = AVERAGE(c8, cC); 5821 c0 = AVERAGE(c0, c8); 5822 5823 *(unsigned short*)(source0 + 2 * x) = c0; 5824 } 5825 5826 source0 += pitch; 5827 source1 += pitch; 5828 source2 += pitch; 5829 source3 += pitch; 5830 source4 += pitch; 5831 source5 += pitch; 5832 source6 += pitch; 5833 source7 += pitch; 5834 source8 += pitch; 5835 source9 += pitch; 5836 sourceA += pitch; 5837 sourceB += pitch; 5838 sourceC += pitch; 5839 sourceD += pitch; 5840 sourceE += pitch; 5841 sourceF += pitch; 5842 } 5843 } 5844 else ASSERT(false); 5845 5846 #undef AVERAGE 5847 } 5848 } 5849 else 5850 { 5851 // UNIMPLEMENTED(); 5852 } 5853 } 5854} 5855