Surface.cpp revision 43577b8cc676a157ceab055ead33a441c23b2cf5
1// SwiftShader Software Renderer 2// 3// Copyright(c) 2005-2013 TransGaming Inc. 4// 5// All rights reserved. No part of this software may be copied, distributed, transmitted, 6// transcribed, stored in a retrieval system, translated into any human or computer 7// language by any means, or disclosed to third parties without the explicit written 8// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express 9// or implied, including but not limited to any patent rights, are granted to you. 10// 11 12#include "Surface.hpp" 13 14#include "Color.hpp" 15#include "Context.hpp" 16#include "ETC_Decoder.hpp" 17#include "Renderer.hpp" 18#include "Common/Half.hpp" 19#include "Common/Memory.hpp" 20#include "Common/CPUID.hpp" 21#include "Common/Resource.hpp" 22#include "Common/Debug.hpp" 23#include "Reactor/Reactor.hpp" 24 25#include <xmmintrin.h> 26#include <emmintrin.h> 27 28#undef min 29#undef max 30 31namespace sw 32{ 33 extern bool quadLayoutEnabled; 34 extern bool complementaryDepthBuffer; 35 extern TranscendentalPrecision logPrecision; 36 37 unsigned int *Surface::palette = 0; 38 unsigned int Surface::paletteID = 0; 39 40 void Rect::clip(int minX, int minY, int maxX, int maxY) 41 { 42 x0 = clamp(x0, minX, maxX); 43 y0 = clamp(y0, minY, maxY); 44 x1 = clamp(x1, minX, maxX); 45 y1 = clamp(y1, minY, maxY); 46 } 47 48 void Surface::Buffer::write(int x, int y, int z, const Color<float> &color) 49 { 50 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB; 51 52 write(element, color); 53 } 54 55 void Surface::Buffer::write(int x, int y, const Color<float> &color) 56 { 57 void *element = (unsigned char*)buffer + x * bytes + y * pitchB; 58 59 write(element, color); 60 } 61 62 inline void Surface::Buffer::write(void *element, const Color<float> &color) 63 { 64 switch(format) 65 { 66 case FORMAT_A8: 67 *(unsigned char*)element = unorm<8>(color.a); 68 break; 69 case FORMAT_R8: 70 *(unsigned char*)element = unorm<8>(color.r); 71 break; 72 case FORMAT_R3G3B2: 73 *(unsigned char*)element = (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0); 74 break; 75 case FORMAT_A8R3G3B2: 76 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<3>(color.r) << 5) | (unorm<3>(color.g) << 2) | (unorm<2>(color.b) << 0); 77 break; 78 case FORMAT_X4R4G4B4: 79 *(unsigned short*)element = 0xF000 | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0); 80 break; 81 case FORMAT_A4R4G4B4: 82 *(unsigned short*)element = (unorm<4>(color.a) << 12) | (unorm<4>(color.r) << 8) | (unorm<4>(color.g) << 4) | (unorm<4>(color.b) << 0); 83 break; 84 case FORMAT_R4G4B4A4: 85 *(unsigned short*)element = (unorm<4>(color.r) << 12) | (unorm<4>(color.g) << 8) | (unorm<4>(color.b) << 4) | (unorm<4>(color.a) << 0); 86 break; 87 case FORMAT_R5G6B5: 88 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<6>(color.g) << 5) | (unorm<5>(color.b) << 0); 89 break; 90 case FORMAT_A1R5G5B5: 91 *(unsigned short*)element = (unorm<1>(color.a) << 15) | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0); 92 break; 93 case FORMAT_R5G5B5A1: 94 *(unsigned short*)element = (unorm<5>(color.r) << 11) | (unorm<5>(color.g) << 6) | (unorm<5>(color.b) << 1) | (unorm<5>(color.a) << 0); 95 break; 96 case FORMAT_X1R5G5B5: 97 *(unsigned short*)element = 0x8000 | (unorm<5>(color.r) << 10) | (unorm<5>(color.g) << 5) | (unorm<5>(color.b) << 0); 98 break; 99 case FORMAT_A8R8G8B8: 100 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0); 101 break; 102 case FORMAT_X8R8G8B8: 103 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.r) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.b) << 0); 104 break; 105 case FORMAT_A8B8G8R8: 106 *(unsigned int*)element = (unorm<8>(color.a) << 24) | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0); 107 break; 108 case FORMAT_X8B8G8R8: 109 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0); 110 break; 111 case FORMAT_A2R10G10B10: 112 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.r) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.b) << 0); 113 break; 114 case FORMAT_A2B10G10R10: 115 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (unorm<10>(color.b) << 20) | (unorm<10>(color.g) << 10) | (unorm<10>(color.r) << 0); 116 break; 117 case FORMAT_G8R8: 118 *(unsigned int*)element = (unorm<8>(color.g) << 8) | (unorm<8>(color.r) << 0); 119 break; 120 case FORMAT_G16R16: 121 *(unsigned int*)element = (unorm<16>(color.g) << 16) | (unorm<16>(color.r) << 0); 122 break; 123 case FORMAT_A16B16G16R16: 124 ((unsigned short*)element)[0] = unorm<16>(color.r); 125 ((unsigned short*)element)[1] = unorm<16>(color.g); 126 ((unsigned short*)element)[2] = unorm<16>(color.b); 127 ((unsigned short*)element)[3] = unorm<16>(color.a); 128 break; 129 case FORMAT_V8U8: 130 *(unsigned short*)element = (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0); 131 break; 132 case FORMAT_L6V5U5: 133 *(unsigned short*)element = (unorm<6>(color.b) << 10) | (snorm<5>(color.g) << 5) | (snorm<5>(color.r) << 0); 134 break; 135 case FORMAT_Q8W8V8U8: 136 *(unsigned int*)element = (snorm<8>(color.a) << 24) | (snorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0); 137 break; 138 case FORMAT_X8L8V8U8: 139 *(unsigned int*)element = 0xFF000000 | (unorm<8>(color.b) << 16) | (snorm<8>(color.g) << 8) | (snorm<8>(color.r) << 0); 140 break; 141 case FORMAT_V16U16: 142 *(unsigned int*)element = (snorm<16>(color.g) << 16) | (snorm<16>(color.r) << 0); 143 break; 144 case FORMAT_A2W10V10U10: 145 *(unsigned int*)element = (unorm<2>(color.a) << 30) | (snorm<10>(color.b) << 20) | (snorm<10>(color.g) << 10) | (snorm<10>(color.r) << 0); 146 break; 147 case FORMAT_A16W16V16U16: 148 ((unsigned short*)element)[0] = snorm<16>(color.r); 149 ((unsigned short*)element)[1] = snorm<16>(color.g); 150 ((unsigned short*)element)[2] = snorm<16>(color.b); 151 ((unsigned short*)element)[3] = unorm<16>(color.a); 152 break; 153 case FORMAT_Q16W16V16U16: 154 ((unsigned short*)element)[0] = snorm<16>(color.r); 155 ((unsigned short*)element)[1] = snorm<16>(color.g); 156 ((unsigned short*)element)[2] = snorm<16>(color.b); 157 ((unsigned short*)element)[3] = snorm<16>(color.a); 158 break; 159 case FORMAT_R8G8B8: 160 ((unsigned char*)element)[0] = unorm<8>(color.b); 161 ((unsigned char*)element)[1] = unorm<8>(color.g); 162 ((unsigned char*)element)[2] = unorm<8>(color.r); 163 break; 164 case FORMAT_B8G8R8: 165 ((unsigned char*)element)[0] = unorm<8>(color.r); 166 ((unsigned char*)element)[1] = unorm<8>(color.g); 167 ((unsigned char*)element)[2] = unorm<8>(color.b); 168 break; 169 case FORMAT_R16F: 170 *(half*)element = (half)color.r; 171 break; 172 case FORMAT_A16F: 173 *(half*)element = (half)color.a; 174 break; 175 case FORMAT_G16R16F: 176 ((half*)element)[0] = (half)color.r; 177 ((half*)element)[1] = (half)color.g; 178 break; 179 case FORMAT_B16G16R16F: 180 ((half*)element)[0] = (half)color.r; 181 ((half*)element)[1] = (half)color.g; 182 ((half*)element)[2] = (half)color.b; 183 break; 184 case FORMAT_A16B16G16R16F: 185 ((half*)element)[0] = (half)color.r; 186 ((half*)element)[1] = (half)color.g; 187 ((half*)element)[2] = (half)color.b; 188 ((half*)element)[3] = (half)color.a; 189 break; 190 case FORMAT_A32F: 191 *(float*)element = color.a; 192 break; 193 case FORMAT_R32F: 194 *(float*)element = color.r; 195 break; 196 case FORMAT_G32R32F: 197 ((float*)element)[0] = color.r; 198 ((float*)element)[1] = color.g; 199 break; 200 case FORMAT_B32G32R32F: 201 ((float*)element)[0] = color.r; 202 ((float*)element)[1] = color.g; 203 ((float*)element)[2] = color.b; 204 break; 205 case FORMAT_A32B32G32R32F: 206 ((float*)element)[0] = color.r; 207 ((float*)element)[1] = color.g; 208 ((float*)element)[2] = color.b; 209 ((float*)element)[3] = color.a; 210 break; 211 case FORMAT_D32F: 212 case FORMAT_D32F_LOCKABLE: 213 case FORMAT_D32FS8_TEXTURE: 214 case FORMAT_D32FS8_SHADOW: 215 *((float*)element) = color.r; 216 break; 217 case FORMAT_D32F_COMPLEMENTARY: 218 *((float*)element) = 1 - color.r; 219 break; 220 case FORMAT_S8: 221 *((unsigned char*)element) = unorm<8>(color.r); 222 break; 223 case FORMAT_L8: 224 *(unsigned char*)element = unorm<8>(color.r); 225 break; 226 case FORMAT_A4L4: 227 *(unsigned char*)element = (unorm<4>(color.a) << 4) | (unorm<4>(color.r) << 0); 228 break; 229 case FORMAT_L16: 230 *(unsigned short*)element = unorm<16>(color.r); 231 break; 232 case FORMAT_A8L8: 233 *(unsigned short*)element = (unorm<8>(color.a) << 8) | (unorm<8>(color.r) << 0); 234 break; 235 case FORMAT_L16F: 236 *(half*)element = (half)color.r; 237 break; 238 case FORMAT_A16L16F: 239 ((half*)element)[0] = (half)color.r; 240 ((half*)element)[1] = (half)color.a; 241 break; 242 case FORMAT_L32F: 243 *(float*)element = color.r; 244 break; 245 case FORMAT_A32L32F: 246 ((float*)element)[0] = color.r; 247 ((float*)element)[1] = color.a; 248 break; 249 default: 250 ASSERT(false); 251 } 252 } 253 254 Color<float> Surface::Buffer::read(int x, int y, int z) const 255 { 256 void *element = (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB; 257 258 return read(element); 259 } 260 261 Color<float> Surface::Buffer::read(int x, int y) const 262 { 263 void *element = (unsigned char*)buffer + x * bytes + y * pitchB; 264 265 return read(element); 266 } 267 268 inline Color<float> Surface::Buffer::read(void *element) const 269 { 270 float r = 0.0f; 271 float g = 0.0f; 272 float b = 0.0f; 273 float a = 1.0f; 274 275 switch(format) 276 { 277 case FORMAT_P8: 278 { 279 ASSERT(palette); 280 281 unsigned int abgr = palette[*(unsigned char*)element]; 282 283 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF); 284 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00); 285 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000); 286 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000); 287 } 288 break; 289 case FORMAT_A8P8: 290 { 291 ASSERT(palette); 292 293 unsigned int bgr = palette[((unsigned char*)element)[0]]; 294 295 r = (bgr & 0x000000FF) * (1.0f / 0x000000FF); 296 g = (bgr & 0x0000FF00) * (1.0f / 0x0000FF00); 297 b = (bgr & 0x00FF0000) * (1.0f / 0x00FF0000); 298 a = ((unsigned char*)element)[1] * (1.0f / 0xFF); 299 } 300 break; 301 case FORMAT_A8: 302 r = 0; 303 g = 0; 304 b = 0; 305 a = *(unsigned char*)element * (1.0f / 0xFF); 306 break; 307 case FORMAT_R8: 308 r = *(unsigned char*)element * (1.0f / 0xFF); 309 break; 310 case FORMAT_R3G3B2: 311 { 312 unsigned char rgb = *(unsigned char*)element; 313 314 r = (rgb & 0xE0) * (1.0f / 0xE0); 315 g = (rgb & 0x1C) * (1.0f / 0x1C); 316 b = (rgb & 0x03) * (1.0f / 0x03); 317 } 318 break; 319 case FORMAT_A8R3G3B2: 320 { 321 unsigned short argb = *(unsigned short*)element; 322 323 a = (argb & 0xFF00) * (1.0f / 0xFF00); 324 r = (argb & 0x00E0) * (1.0f / 0x00E0); 325 g = (argb & 0x001C) * (1.0f / 0x001C); 326 b = (argb & 0x0003) * (1.0f / 0x0003); 327 } 328 break; 329 case FORMAT_X4R4G4B4: 330 { 331 unsigned short rgb = *(unsigned short*)element; 332 333 r = (rgb & 0x0F00) * (1.0f / 0x0F00); 334 g = (rgb & 0x00F0) * (1.0f / 0x00F0); 335 b = (rgb & 0x000F) * (1.0f / 0x000F); 336 } 337 break; 338 case FORMAT_A4R4G4B4: 339 { 340 unsigned short argb = *(unsigned short*)element; 341 342 a = (argb & 0xF000) * (1.0f / 0xF000); 343 r = (argb & 0x0F00) * (1.0f / 0x0F00); 344 g = (argb & 0x00F0) * (1.0f / 0x00F0); 345 b = (argb & 0x000F) * (1.0f / 0x000F); 346 } 347 break; 348 case FORMAT_R4G4B4A4: 349 { 350 unsigned short rgba = *(unsigned short*)element; 351 352 r = (rgba & 0xF000) * (1.0f / 0xF000); 353 g = (rgba & 0x0F00) * (1.0f / 0x0F00); 354 b = (rgba & 0x00F0) * (1.0f / 0x00F0); 355 a = (rgba & 0x000F) * (1.0f / 0x000F); 356 } 357 break; 358 case FORMAT_R5G6B5: 359 { 360 unsigned short rgb = *(unsigned short*)element; 361 362 r = (rgb & 0xF800) * (1.0f / 0xF800); 363 g = (rgb & 0x07E0) * (1.0f / 0x07E0); 364 b = (rgb & 0x001F) * (1.0f / 0x001F); 365 } 366 break; 367 case FORMAT_A1R5G5B5: 368 { 369 unsigned short argb = *(unsigned short*)element; 370 371 a = (argb & 0x8000) * (1.0f / 0x8000); 372 r = (argb & 0x7C00) * (1.0f / 0x7C00); 373 g = (argb & 0x03E0) * (1.0f / 0x03E0); 374 b = (argb & 0x001F) * (1.0f / 0x001F); 375 } 376 break; 377 case FORMAT_R5G5B5A1: 378 { 379 unsigned short rgba = *(unsigned short*)element; 380 381 r = (rgba & 0xF800) * (1.0f / 0xF800); 382 g = (rgba & 0x07C0) * (1.0f / 0x07C0); 383 b = (rgba & 0x003E) * (1.0f / 0x003E); 384 a = (rgba & 0x0001) * (1.0f / 0x0001); 385 } 386 break; 387 case FORMAT_X1R5G5B5: 388 { 389 unsigned short xrgb = *(unsigned short*)element; 390 391 r = (xrgb & 0x7C00) * (1.0f / 0x7C00); 392 g = (xrgb & 0x03E0) * (1.0f / 0x03E0); 393 b = (xrgb & 0x001F) * (1.0f / 0x001F); 394 } 395 break; 396 case FORMAT_A8R8G8B8: 397 { 398 unsigned int argb = *(unsigned int*)element; 399 400 a = (argb & 0xFF000000) * (1.0f / 0xFF000000); 401 r = (argb & 0x00FF0000) * (1.0f / 0x00FF0000); 402 g = (argb & 0x0000FF00) * (1.0f / 0x0000FF00); 403 b = (argb & 0x000000FF) * (1.0f / 0x000000FF); 404 } 405 break; 406 case FORMAT_X8R8G8B8: 407 { 408 unsigned int xrgb = *(unsigned int*)element; 409 410 r = (xrgb & 0x00FF0000) * (1.0f / 0x00FF0000); 411 g = (xrgb & 0x0000FF00) * (1.0f / 0x0000FF00); 412 b = (xrgb & 0x000000FF) * (1.0f / 0x000000FF); 413 } 414 break; 415 case FORMAT_A8B8G8R8: 416 { 417 unsigned int abgr = *(unsigned int*)element; 418 419 a = (abgr & 0xFF000000) * (1.0f / 0xFF000000); 420 b = (abgr & 0x00FF0000) * (1.0f / 0x00FF0000); 421 g = (abgr & 0x0000FF00) * (1.0f / 0x0000FF00); 422 r = (abgr & 0x000000FF) * (1.0f / 0x000000FF); 423 } 424 break; 425 case FORMAT_X8B8G8R8: 426 { 427 unsigned int xbgr = *(unsigned int*)element; 428 429 b = (xbgr & 0x00FF0000) * (1.0f / 0x00FF0000); 430 g = (xbgr & 0x0000FF00) * (1.0f / 0x0000FF00); 431 r = (xbgr & 0x000000FF) * (1.0f / 0x000000FF); 432 } 433 break; 434 case FORMAT_G8R8: 435 { 436 unsigned short gr = *(unsigned short*)element; 437 438 g = (gr & 0xFF00) * (1.0f / 0xFF00); 439 r = (gr & 0x00FF) * (1.0f / 0x00FF); 440 } 441 break; 442 case FORMAT_G16R16: 443 { 444 unsigned int gr = *(unsigned int*)element; 445 446 g = (gr & 0xFFFF0000) * (1.0f / 0xFFFF0000); 447 r = (gr & 0x0000FFFF) * (1.0f / 0x0000FFFF); 448 } 449 break; 450 case FORMAT_A2R10G10B10: 451 { 452 unsigned int argb = *(unsigned int*)element; 453 454 a = (argb & 0xC0000000) * (1.0f / 0xC0000000); 455 r = (argb & 0x3FF00000) * (1.0f / 0x3FF00000); 456 g = (argb & 0x000FFC00) * (1.0f / 0x000FFC00); 457 b = (argb & 0x000003FF) * (1.0f / 0x000003FF); 458 } 459 break; 460 case FORMAT_A2B10G10R10: 461 { 462 unsigned int abgr = *(unsigned int*)element; 463 464 a = (abgr & 0xC0000000) * (1.0f / 0xC0000000); 465 b = (abgr & 0x3FF00000) * (1.0f / 0x3FF00000); 466 g = (abgr & 0x000FFC00) * (1.0f / 0x000FFC00); 467 r = (abgr & 0x000003FF) * (1.0f / 0x000003FF); 468 } 469 break; 470 case FORMAT_A16B16G16R16: 471 r = ((unsigned short*)element)[0] * (1.0f / 0xFFFF); 472 g = ((unsigned short*)element)[1] * (1.0f / 0xFFFF); 473 b = ((unsigned short*)element)[2] * (1.0f / 0xFFFF); 474 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF); 475 break; 476 case FORMAT_V8U8: 477 { 478 unsigned short vu = *(unsigned short*)element; 479 480 r = ((int)(vu & 0x00FF) << 24) * (1.0f / 0x7F000000); 481 g = ((int)(vu & 0xFF00) << 16) * (1.0f / 0x7F000000); 482 } 483 break; 484 case FORMAT_L6V5U5: 485 { 486 unsigned short lvu = *(unsigned short*)element; 487 488 r = ((int)(lvu & 0x001F) << 27) * (1.0f / 0x78000000); 489 g = ((int)(lvu & 0x03E0) << 22) * (1.0f / 0x78000000); 490 b = (lvu & 0xFC00) * (1.0f / 0xFC00); 491 } 492 break; 493 case FORMAT_Q8W8V8U8: 494 { 495 unsigned int qwvu = *(unsigned int*)element; 496 497 r = ((int)(qwvu & 0x000000FF) << 24) * (1.0f / 0x7F000000); 498 g = ((int)(qwvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000); 499 b = ((int)(qwvu & 0x00FF0000) << 8) * (1.0f / 0x7F000000); 500 a = ((int)(qwvu & 0xFF000000) << 0) * (1.0f / 0x7F000000); 501 } 502 break; 503 case FORMAT_X8L8V8U8: 504 { 505 unsigned int xlvu = *(unsigned int*)element; 506 507 r = ((int)(xlvu & 0x000000FF) << 24) * (1.0f / 0x7F000000); 508 g = ((int)(xlvu & 0x0000FF00) << 16) * (1.0f / 0x7F000000); 509 b = (xlvu & 0x00FF0000) * (1.0f / 0x00FF0000); 510 } 511 break; 512 case FORMAT_R8G8B8: 513 r = ((unsigned char*)element)[2] * (1.0f / 0xFF); 514 g = ((unsigned char*)element)[1] * (1.0f / 0xFF); 515 b = ((unsigned char*)element)[0] * (1.0f / 0xFF); 516 break; 517 case FORMAT_B8G8R8: 518 r = ((unsigned char*)element)[0] * (1.0f / 0xFF); 519 g = ((unsigned char*)element)[1] * (1.0f / 0xFF); 520 b = ((unsigned char*)element)[2] * (1.0f / 0xFF); 521 break; 522 case FORMAT_V16U16: 523 { 524 unsigned int vu = *(unsigned int*)element; 525 526 r = ((int)(vu & 0x0000FFFF) << 16) * (1.0f / 0x7FFF0000); 527 g = ((int)(vu & 0xFFFF0000) << 0) * (1.0f / 0x7FFF0000); 528 } 529 break; 530 case FORMAT_A2W10V10U10: 531 { 532 unsigned int awvu = *(unsigned int*)element; 533 534 r = ((int)(awvu & 0x000003FF) << 22) * (1.0f / 0x7FC00000); 535 g = ((int)(awvu & 0x000FFC00) << 12) * (1.0f / 0x7FC00000); 536 b = ((int)(awvu & 0x3FF00000) << 2) * (1.0f / 0x7FC00000); 537 a = (awvu & 0xC0000000) * (1.0f / 0xC0000000); 538 } 539 break; 540 case FORMAT_A16W16V16U16: 541 r = ((signed short*)element)[0] * (1.0f / 0x7FFF); 542 g = ((signed short*)element)[1] * (1.0f / 0x7FFF); 543 b = ((signed short*)element)[2] * (1.0f / 0x7FFF); 544 a = ((unsigned short*)element)[3] * (1.0f / 0xFFFF); 545 break; 546 case FORMAT_Q16W16V16U16: 547 r = ((signed short*)element)[0] * (1.0f / 0x7FFF); 548 g = ((signed short*)element)[1] * (1.0f / 0x7FFF); 549 b = ((signed short*)element)[2] * (1.0f / 0x7FFF); 550 a = ((signed short*)element)[3] * (1.0f / 0x7FFF); 551 break; 552 case FORMAT_L8: 553 r = 554 g = 555 b = *(unsigned char*)element * (1.0f / 0xFF); 556 break; 557 case FORMAT_A4L4: 558 { 559 unsigned char al = *(unsigned char*)element; 560 561 r = 562 g = 563 b = (al & 0x0F) * (1.0f / 0x0F); 564 a = (al & 0xF0) * (1.0f / 0xF0); 565 } 566 break; 567 case FORMAT_L16: 568 r = 569 g = 570 b = *(unsigned short*)element * (1.0f / 0xFFFF); 571 break; 572 case FORMAT_A8L8: 573 r = 574 g = 575 b = ((unsigned char*)element)[0] * (1.0f / 0xFF); 576 a = ((unsigned char*)element)[1] * (1.0f / 0xFF); 577 break; 578 case FORMAT_L16F: 579 r = 580 g = 581 b = *(half*)element; 582 break; 583 case FORMAT_A16L16F: 584 r = 585 g = 586 b = ((half*)element)[0]; 587 a = ((half*)element)[1]; 588 break; 589 case FORMAT_L32F: 590 r = 591 g = 592 b = *(float*)element; 593 break; 594 case FORMAT_A32L32F: 595 r = 596 g = 597 b = ((float*)element)[0]; 598 a = ((float*)element)[1]; 599 break; 600 case FORMAT_A16F: 601 a = *(half*)element; 602 break; 603 case FORMAT_R16F: 604 r = *(half*)element; 605 break; 606 case FORMAT_G16R16F: 607 r = ((half*)element)[0]; 608 g = ((half*)element)[1]; 609 break; 610 case FORMAT_B16G16R16F: 611 r = ((half*)element)[0]; 612 g = ((half*)element)[1]; 613 b = ((half*)element)[2]; 614 break; 615 case FORMAT_A16B16G16R16F: 616 r = ((half*)element)[0]; 617 g = ((half*)element)[1]; 618 b = ((half*)element)[2]; 619 a = ((half*)element)[3]; 620 break; 621 case FORMAT_A32F: 622 a = *(float*)element; 623 break; 624 case FORMAT_R32F: 625 r = *(float*)element; 626 break; 627 case FORMAT_G32R32F: 628 r = ((float*)element)[0]; 629 g = ((float*)element)[1]; 630 break; 631 case FORMAT_B32G32R32F: 632 r = ((float*)element)[0]; 633 g = ((float*)element)[1]; 634 b = ((float*)element)[2]; 635 break; 636 case FORMAT_A32B32G32R32F: 637 r = ((float*)element)[0]; 638 g = ((float*)element)[1]; 639 b = ((float*)element)[2]; 640 a = ((float*)element)[3]; 641 break; 642 case FORMAT_D32F: 643 case FORMAT_D32F_LOCKABLE: 644 case FORMAT_D32FS8_TEXTURE: 645 case FORMAT_D32FS8_SHADOW: 646 r = *(float*)element; 647 g = r; 648 b = r; 649 a = r; 650 break; 651 case FORMAT_D32F_COMPLEMENTARY: 652 r = 1.0f - *(float*)element; 653 g = r; 654 b = r; 655 a = r; 656 break; 657 case FORMAT_S8: 658 r = *(unsigned char*)element * (1.0f / 0xFF); 659 break; 660 default: 661 ASSERT(false); 662 } 663 664 // if(sRGB) 665 // { 666 // r = sRGBtoLinear(r); 667 // g = sRGBtoLinear(g); 668 // b = sRGBtoLinear(b); 669 // } 670 671 return Color<float>(r, g, b, a); 672 } 673 674 Color<float> Surface::Buffer::sample(float x, float y, float z) const 675 { 676 x -= 0.5f; 677 y -= 0.5f; 678 z -= 0.5f; 679 680 int x0 = clamp((int)x, 0, width - 1); 681 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1; 682 683 int y0 = clamp((int)y, 0, height - 1); 684 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1; 685 686 int z0 = clamp((int)z, 0, depth - 1); 687 int z1 = (z0 + 1 >= depth) ? z0 : z0 + 1; 688 689 Color<float> c000 = read(x0, y0, z0); 690 Color<float> c100 = read(x1, y0, z0); 691 Color<float> c010 = read(x0, y1, z0); 692 Color<float> c110 = read(x1, y1, z0); 693 Color<float> c001 = read(x0, y0, z1); 694 Color<float> c101 = read(x1, y0, z1); 695 Color<float> c011 = read(x0, y1, z1); 696 Color<float> c111 = read(x1, y1, z1); 697 698 float fx = x - x0; 699 float fy = y - y0; 700 float fz = z - z0; 701 702 c000 *= (1 - fx) * (1 - fy) * (1 - fz); 703 c100 *= fx * (1 - fy) * (1 - fz); 704 c010 *= (1 - fx) * fy * (1 - fz); 705 c110 *= fx * fy * (1 - fz); 706 c001 *= (1 - fx) * (1 - fy) * fz; 707 c101 *= fx * (1 - fy) * fz; 708 c011 *= (1 - fx) * fy * fz; 709 c111 *= fx * fy * fz; 710 711 return c000 + c100 + c010 + c110 + c001 + c101 + c011 + c111; 712 } 713 714 Color<float> Surface::Buffer::sample(float x, float y) const 715 { 716 x -= 0.5f; 717 y -= 0.5f; 718 719 int x0 = clamp((int)x, 0, width - 1); 720 int x1 = (x0 + 1 >= width) ? x0 : x0 + 1; 721 722 int y0 = clamp((int)y, 0, height - 1); 723 int y1 = (y0 + 1 >= height) ? y0 : y0 + 1; 724 725 Color<float> c00 = read(x0, y0); 726 Color<float> c10 = read(x1, y0); 727 Color<float> c01 = read(x0, y1); 728 Color<float> c11 = read(x1, y1); 729 730 float fx = x - x0; 731 float fy = y - y0; 732 733 c00 *= (1 - fx) * (1 - fy); 734 c10 *= fx * (1 - fy); 735 c01 *= (1 - fx) * fy; 736 c11 *= fx * fy; 737 738 return c00 + c10 + c01 + c11; 739 } 740 741 void *Surface::Buffer::lockRect(int x, int y, int z, Lock lock) 742 { 743 this->lock = lock; 744 745 switch(lock) 746 { 747 case LOCK_UNLOCKED: 748 case LOCK_READONLY: 749 break; 750 case LOCK_WRITEONLY: 751 case LOCK_READWRITE: 752 case LOCK_DISCARD: 753 dirty = true; 754 break; 755 default: 756 ASSERT(false); 757 } 758 759 if(buffer) 760 { 761 switch(format) 762 { 763 #if S3TC_SUPPORT 764 case FORMAT_DXT1: 765 #endif 766 case FORMAT_ATI1: 767 case FORMAT_ETC1: 768 case FORMAT_R11_EAC: 769 case FORMAT_SIGNED_R11_EAC: 770 case FORMAT_RGB8_ETC2: 771 case FORMAT_SRGB8_ETC2: 772 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 773 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 774 return (unsigned char*)buffer + 8 * (x / 4) + (y / 4) * pitchB + z * sliceB; 775 case FORMAT_RG11_EAC: 776 case FORMAT_SIGNED_RG11_EAC: 777 case FORMAT_RGBA8_ETC2_EAC: 778 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 779 case FORMAT_RGBA_ASTC_4x4_KHR: 780 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 781 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB; 782 case FORMAT_RGBA_ASTC_5x4_KHR: 783 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 784 return (unsigned char*)buffer + 16 * (x / 5) + (y / 4) * pitchB + z * sliceB; 785 case FORMAT_RGBA_ASTC_5x5_KHR: 786 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 787 return (unsigned char*)buffer + 16 * (x / 5) + (y / 5) * pitchB + z * sliceB; 788 case FORMAT_RGBA_ASTC_6x5_KHR: 789 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 790 return (unsigned char*)buffer + 16 * (x / 6) + (y / 5) * pitchB + z * sliceB; 791 case FORMAT_RGBA_ASTC_6x6_KHR: 792 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 793 return (unsigned char*)buffer + 16 * (x / 6) + (y / 6) * pitchB + z * sliceB; 794 case FORMAT_RGBA_ASTC_8x5_KHR: 795 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 796 return (unsigned char*)buffer + 16 * (x / 8) + (y / 5) * pitchB + z * sliceB; 797 case FORMAT_RGBA_ASTC_8x6_KHR: 798 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 799 return (unsigned char*)buffer + 16 * (x / 8) + (y / 6) * pitchB + z * sliceB; 800 case FORMAT_RGBA_ASTC_8x8_KHR: 801 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 802 return (unsigned char*)buffer + 16 * (x / 8) + (y / 8) * pitchB + z * sliceB; 803 case FORMAT_RGBA_ASTC_10x5_KHR: 804 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 805 return (unsigned char*)buffer + 16 * (x / 10) + (y / 5) * pitchB + z * sliceB; 806 case FORMAT_RGBA_ASTC_10x6_KHR: 807 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 808 return (unsigned char*)buffer + 16 * (x / 10) + (y / 6) * pitchB + z * sliceB; 809 case FORMAT_RGBA_ASTC_10x8_KHR: 810 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 811 return (unsigned char*)buffer + 16 * (x / 10) + (y / 8) * pitchB + z * sliceB; 812 case FORMAT_RGBA_ASTC_10x10_KHR: 813 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 814 return (unsigned char*)buffer + 16 * (x / 10) + (y / 10) * pitchB + z * sliceB; 815 case FORMAT_RGBA_ASTC_12x10_KHR: 816 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 817 return (unsigned char*)buffer + 16 * (x / 12) + (y / 10) * pitchB + z * sliceB; 818 case FORMAT_RGBA_ASTC_12x12_KHR: 819 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 820 return (unsigned char*)buffer + 16 * (x / 12) + (y / 12) * pitchB + z * sliceB; 821 #if S3TC_SUPPORT 822 case FORMAT_DXT3: 823 case FORMAT_DXT5: 824 #endif 825 case FORMAT_ATI2: 826 return (unsigned char*)buffer + 16 * (x / 4) + (y / 4) * pitchB + z * sliceB; 827 default: 828 return (unsigned char*)buffer + x * bytes + y * pitchB + z * sliceB; 829 } 830 } 831 832 return 0; 833 } 834 835 void Surface::Buffer::unlockRect() 836 { 837 lock = LOCK_UNLOCKED; 838 } 839 840 Surface::Surface(int width, int height, int depth, Format format, void *pixels, int pitch, int slice) : lockable(true), renderTarget(false) 841 { 842 resource = new Resource(0); 843 hasParent = false; 844 ownExternal = false; 845 depth = max(1, depth); 846 847 external.buffer = pixels; 848 external.width = width; 849 external.height = height; 850 external.depth = depth; 851 external.format = format; 852 external.bytes = bytes(external.format); 853 external.pitchB = pitch; 854 external.pitchP = pitch / external.bytes; 855 external.sliceB = slice; 856 external.sliceP = slice / external.bytes; 857 external.lock = LOCK_UNLOCKED; 858 external.dirty = true; 859 860 internal.buffer = 0; 861 internal.width = width; 862 internal.height = height; 863 internal.depth = depth; 864 internal.format = selectInternalFormat(format); 865 internal.bytes = bytes(internal.format); 866 internal.pitchB = pitchB(internal.width, internal.format, false); 867 internal.pitchP = pitchP(internal.width, internal.format, false); 868 internal.sliceB = sliceB(internal.width, internal.height, internal.format, false); 869 internal.sliceP = sliceP(internal.width, internal.height, internal.format, false); 870 internal.lock = LOCK_UNLOCKED; 871 internal.dirty = false; 872 873 stencil.buffer = 0; 874 stencil.width = width; 875 stencil.height = height; 876 stencil.depth = depth; 877 stencil.format = FORMAT_S8; 878 stencil.bytes = bytes(stencil.format); 879 stencil.pitchB = pitchB(stencil.width, stencil.format, false); 880 stencil.pitchP = pitchP(stencil.width, stencil.format, false); 881 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, false); 882 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, false); 883 stencil.lock = LOCK_UNLOCKED; 884 stencil.dirty = false; 885 886 dirtyMipmaps = true; 887 paletteUsed = 0; 888 } 889 890 Surface::Surface(Resource *texture, int width, int height, int depth, Format format, bool lockable, bool renderTarget) : lockable(lockable), renderTarget(renderTarget) 891 { 892 resource = texture ? texture : new Resource(0); 893 hasParent = texture != 0; 894 ownExternal = true; 895 depth = max(1, depth); 896 897 external.buffer = 0; 898 external.width = width; 899 external.height = height; 900 external.depth = depth; 901 external.format = format; 902 external.bytes = bytes(external.format); 903 external.pitchB = pitchB(external.width, external.format, renderTarget && !texture); 904 external.pitchP = pitchP(external.width, external.format, renderTarget && !texture); 905 external.sliceB = sliceB(external.width, external.height, external.format, renderTarget && !texture); 906 external.sliceP = sliceP(external.width, external.height, external.format, renderTarget && !texture); 907 external.lock = LOCK_UNLOCKED; 908 external.dirty = false; 909 910 internal.buffer = 0; 911 internal.width = width; 912 internal.height = height; 913 internal.depth = depth; 914 internal.format = selectInternalFormat(format); 915 internal.bytes = bytes(internal.format); 916 internal.pitchB = pitchB(internal.width, internal.format, renderTarget); 917 internal.pitchP = pitchP(internal.width, internal.format, renderTarget); 918 internal.sliceB = sliceB(internal.width, internal.height, internal.format, renderTarget); 919 internal.sliceP = sliceP(internal.width, internal.height, internal.format, renderTarget); 920 internal.lock = LOCK_UNLOCKED; 921 internal.dirty = false; 922 923 stencil.buffer = 0; 924 stencil.width = width; 925 stencil.height = height; 926 stencil.depth = depth; 927 stencil.format = FORMAT_S8; 928 stencil.bytes = bytes(stencil.format); 929 stencil.pitchB = pitchB(stencil.width, stencil.format, renderTarget); 930 stencil.pitchP = pitchP(stencil.width, stencil.format, renderTarget); 931 stencil.sliceB = sliceB(stencil.width, stencil.height, stencil.format, renderTarget); 932 stencil.sliceP = sliceP(stencil.width, stencil.height, stencil.format, renderTarget); 933 stencil.lock = LOCK_UNLOCKED; 934 stencil.dirty = false; 935 936 dirtyMipmaps = true; 937 paletteUsed = 0; 938 } 939 940 Surface::~Surface() 941 { 942 // Synchronize so we can deallocate the buffers below 943 resource->lock(DESTRUCT); 944 resource->unlock(); 945 946 if(!hasParent) 947 { 948 resource->destruct(); 949 } 950 951 if(ownExternal) 952 { 953 deallocate(external.buffer); 954 } 955 956 if(internal.buffer != external.buffer) 957 { 958 deallocate(internal.buffer); 959 } 960 961 deallocate(stencil.buffer); 962 963 external.buffer = 0; 964 internal.buffer = 0; 965 stencil.buffer = 0; 966 } 967 968 void *Surface::lockExternal(int x, int y, int z, Lock lock, Accessor client) 969 { 970 resource->lock(client); 971 972 if(!external.buffer) 973 { 974 if(internal.buffer && identicalFormats()) 975 { 976 external.buffer = internal.buffer; 977 } 978 else 979 { 980 external.buffer = allocateBuffer(external.width, external.height, external.depth, external.format); 981 } 982 } 983 984 if(internal.dirty) 985 { 986 if(lock != LOCK_DISCARD) 987 { 988 update(external, internal); 989 } 990 991 internal.dirty = false; 992 } 993 994 switch(lock) 995 { 996 case LOCK_READONLY: 997 break; 998 case LOCK_WRITEONLY: 999 case LOCK_READWRITE: 1000 case LOCK_DISCARD: 1001 dirtyMipmaps = true; 1002 break; 1003 default: 1004 ASSERT(false); 1005 } 1006 1007 return external.lockRect(x, y, z, lock); 1008 } 1009 1010 void Surface::unlockExternal() 1011 { 1012 resource->unlock(); 1013 1014 external.unlockRect(); 1015 } 1016 1017 void *Surface::lockInternal(int x, int y, int z, Lock lock, Accessor client) 1018 { 1019 if(lock != LOCK_UNLOCKED) 1020 { 1021 resource->lock(client); 1022 } 1023 1024 if(!internal.buffer) 1025 { 1026 if(external.buffer && identicalFormats()) 1027 { 1028 internal.buffer = external.buffer; 1029 } 1030 else 1031 { 1032 internal.buffer = allocateBuffer(internal.width, internal.height, internal.depth, internal.format); 1033 } 1034 } 1035 1036 // FIXME: WHQL requires conversion to lower external precision and back 1037 if(logPrecision >= WHQL) 1038 { 1039 if(internal.dirty && renderTarget && internal.format != external.format) 1040 { 1041 if(lock != LOCK_DISCARD) 1042 { 1043 switch(external.format) 1044 { 1045 case FORMAT_R3G3B2: 1046 case FORMAT_A8R3G3B2: 1047 case FORMAT_A1R5G5B5: 1048 case FORMAT_A2R10G10B10: 1049 case FORMAT_A2B10G10R10: 1050 lockExternal(0, 0, 0, LOCK_READWRITE, client); 1051 unlockExternal(); 1052 break; 1053 default: 1054 // Difference passes WHQL 1055 break; 1056 } 1057 } 1058 } 1059 } 1060 1061 if(external.dirty || (isPalette(external.format) && paletteUsed != Surface::paletteID)) 1062 { 1063 if(lock != LOCK_DISCARD) 1064 { 1065 update(internal, external); 1066 } 1067 1068 external.dirty = false; 1069 paletteUsed = Surface::paletteID; 1070 } 1071 1072 switch(lock) 1073 { 1074 case LOCK_UNLOCKED: 1075 case LOCK_READONLY: 1076 break; 1077 case LOCK_WRITEONLY: 1078 case LOCK_READWRITE: 1079 case LOCK_DISCARD: 1080 dirtyMipmaps = true; 1081 break; 1082 default: 1083 ASSERT(false); 1084 } 1085 1086 if(lock == LOCK_READONLY && client == PUBLIC) 1087 { 1088 resolve(); 1089 } 1090 1091 return internal.lockRect(x, y, z, lock); 1092 } 1093 1094 void Surface::unlockInternal() 1095 { 1096 resource->unlock(); 1097 1098 internal.unlockRect(); 1099 } 1100 1101 void *Surface::lockStencil(int front, Accessor client) 1102 { 1103 resource->lock(client); 1104 1105 if(!stencil.buffer) 1106 { 1107 stencil.buffer = allocateBuffer(stencil.width, stencil.height, stencil.depth, stencil.format); 1108 } 1109 1110 return stencil.lockRect(0, 0, front, LOCK_READWRITE); // FIXME 1111 } 1112 1113 void Surface::unlockStencil() 1114 { 1115 resource->unlock(); 1116 1117 stencil.unlockRect(); 1118 } 1119 1120 int Surface::bytes(Format format) 1121 { 1122 switch(format) 1123 { 1124 case FORMAT_NULL: return 0; 1125 case FORMAT_P8: return 1; 1126 case FORMAT_A8P8: return 2; 1127 case FORMAT_A8: return 1; 1128 case FORMAT_R8: return 1; 1129 case FORMAT_R3G3B2: return 1; 1130 case FORMAT_A8R3G3B2: return 2; 1131 case FORMAT_R5G6B5: return 2; 1132 case FORMAT_A1R5G5B5: return 2; 1133 case FORMAT_X1R5G5B5: return 2; 1134 case FORMAT_R5G5B5A1: return 2; 1135 case FORMAT_X4R4G4B4: return 2; 1136 case FORMAT_A4R4G4B4: return 2; 1137 case FORMAT_R4G4B4A4: return 2; 1138 case FORMAT_R8G8B8: return 3; 1139 case FORMAT_B8G8R8: return 3; 1140 case FORMAT_X8R8G8B8: return 4; 1141 // case FORMAT_X8G8R8B8Q: return 4; 1142 case FORMAT_A8R8G8B8: return 4; 1143 // case FORMAT_A8G8R8B8Q: return 4; 1144 case FORMAT_X8B8G8R8: return 4; 1145 case FORMAT_A8B8G8R8: return 4; 1146 case FORMAT_A2R10G10B10: return 4; 1147 case FORMAT_A2B10G10R10: return 4; 1148 case FORMAT_G8R8: return 2; 1149 case FORMAT_G16R16: return 4; 1150 case FORMAT_A16B16G16R16: return 8; 1151 // Compressed formats 1152 #if S3TC_SUPPORT 1153 case FORMAT_DXT1: return 2; // Column of four pixels 1154 case FORMAT_DXT3: return 4; // Column of four pixels 1155 case FORMAT_DXT5: return 4; // Column of four pixels 1156 #endif 1157 case FORMAT_ATI1: return 2; // Column of four pixels 1158 case FORMAT_ATI2: return 4; // Column of four pixels 1159 case FORMAT_ETC1: return 2; // Column of four pixels 1160 case FORMAT_R11_EAC: return 2; 1161 case FORMAT_SIGNED_R11_EAC: return 2; 1162 case FORMAT_RG11_EAC: return 4; 1163 case FORMAT_SIGNED_RG11_EAC: return 4; 1164 case FORMAT_RGB8_ETC2: return 2; 1165 case FORMAT_SRGB8_ETC2: return 2; 1166 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2; 1167 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: return 2; 1168 case FORMAT_RGBA8_ETC2_EAC: return 4; 1169 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: return 4; 1170 case FORMAT_RGBA_ASTC_4x4_KHR: 1171 case FORMAT_RGBA_ASTC_5x4_KHR: 1172 case FORMAT_RGBA_ASTC_5x5_KHR: 1173 case FORMAT_RGBA_ASTC_6x5_KHR: 1174 case FORMAT_RGBA_ASTC_6x6_KHR: 1175 case FORMAT_RGBA_ASTC_8x5_KHR: 1176 case FORMAT_RGBA_ASTC_8x6_KHR: 1177 case FORMAT_RGBA_ASTC_8x8_KHR: 1178 case FORMAT_RGBA_ASTC_10x5_KHR: 1179 case FORMAT_RGBA_ASTC_10x6_KHR: 1180 case FORMAT_RGBA_ASTC_10x8_KHR: 1181 case FORMAT_RGBA_ASTC_10x10_KHR: 1182 case FORMAT_RGBA_ASTC_12x10_KHR: 1183 case FORMAT_RGBA_ASTC_12x12_KHR: 1184 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1185 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1186 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1187 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1188 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1189 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1190 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1191 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1192 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1193 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1194 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1195 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1196 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1197 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: return 0; // FIXME 1198 // Bumpmap formats 1199 case FORMAT_V8U8: return 2; 1200 case FORMAT_L6V5U5: return 2; 1201 case FORMAT_Q8W8V8U8: return 4; 1202 case FORMAT_X8L8V8U8: return 4; 1203 case FORMAT_A2W10V10U10: return 4; 1204 case FORMAT_V16U16: return 4; 1205 case FORMAT_A16W16V16U16: return 8; 1206 case FORMAT_Q16W16V16U16: return 8; 1207 // Luminance formats 1208 case FORMAT_L8: return 1; 1209 case FORMAT_A4L4: return 1; 1210 case FORMAT_L16: return 2; 1211 case FORMAT_A8L8: return 2; 1212 case FORMAT_L16F: return 2; 1213 case FORMAT_A16L16F: return 4; 1214 case FORMAT_L32F: return 4; 1215 case FORMAT_A32L32F: return 8; 1216 // Floating-point formats 1217 case FORMAT_A16F: return 2; 1218 case FORMAT_R16F: return 2; 1219 case FORMAT_G16R16F: return 4; 1220 case FORMAT_B16G16R16F: return 6; 1221 case FORMAT_A16B16G16R16F: return 8; 1222 case FORMAT_A32F: return 4; 1223 case FORMAT_R32F: return 4; 1224 case FORMAT_G32R32F: return 8; 1225 case FORMAT_B32G32R32F: return 12; 1226 case FORMAT_A32B32G32R32F: return 16; 1227 // Depth/stencil formats 1228 case FORMAT_D16: return 2; 1229 case FORMAT_D32: return 4; 1230 case FORMAT_D24X8: return 4; 1231 case FORMAT_D24S8: return 4; 1232 case FORMAT_D24FS8: return 4; 1233 case FORMAT_D32F: return 4; 1234 case FORMAT_D32F_COMPLEMENTARY: return 4; 1235 case FORMAT_D32F_LOCKABLE: return 4; 1236 case FORMAT_D32FS8_TEXTURE: return 4; 1237 case FORMAT_D32FS8_SHADOW: return 4; 1238 case FORMAT_DF24S8: return 4; 1239 case FORMAT_DF16S8: return 2; 1240 case FORMAT_INTZ: return 4; 1241 case FORMAT_S8: return 1; 1242 case FORMAT_YV12_BT601: return 1; // Y plane only 1243 case FORMAT_YV12_BT709: return 1; // Y plane only 1244 case FORMAT_YV12_JFIF: return 1; // Y plane only 1245 default: 1246 ASSERT(false); 1247 } 1248 1249 return 0; 1250 } 1251 1252 int Surface::pitchB(int width, Format format, bool target) 1253 { 1254 if(target || isDepth(format) || isStencil(format)) 1255 { 1256 width = align(width, 2); 1257 } 1258 1259 switch(format) 1260 { 1261 #if S3TC_SUPPORT 1262 case FORMAT_DXT1: 1263 #endif 1264 case FORMAT_ETC1: 1265 case FORMAT_R11_EAC: 1266 case FORMAT_SIGNED_R11_EAC: 1267 case FORMAT_RGB8_ETC2: 1268 case FORMAT_SRGB8_ETC2: 1269 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1270 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1271 return 8 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per 4 rows 1272 case FORMAT_RG11_EAC: 1273 case FORMAT_SIGNED_RG11_EAC: 1274 case FORMAT_RGBA8_ETC2_EAC: 1275 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1276 case FORMAT_RGBA_ASTC_4x4_KHR: 1277 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1278 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows 1279 case FORMAT_RGBA_ASTC_5x4_KHR: 1280 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1281 case FORMAT_RGBA_ASTC_5x5_KHR: 1282 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1283 return 16 * ((width + 4) / 5); 1284 case FORMAT_RGBA_ASTC_6x5_KHR: 1285 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1286 case FORMAT_RGBA_ASTC_6x6_KHR: 1287 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1288 return 16 * ((width + 5) / 6); 1289 case FORMAT_RGBA_ASTC_8x5_KHR: 1290 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1291 case FORMAT_RGBA_ASTC_8x6_KHR: 1292 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1293 case FORMAT_RGBA_ASTC_8x8_KHR: 1294 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1295 return 16 * ((width + 7) / 8); 1296 case FORMAT_RGBA_ASTC_10x5_KHR: 1297 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1298 case FORMAT_RGBA_ASTC_10x6_KHR: 1299 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1300 case FORMAT_RGBA_ASTC_10x8_KHR: 1301 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1302 case FORMAT_RGBA_ASTC_10x10_KHR: 1303 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1304 return 16 * ((width + 9) / 10); 1305 case FORMAT_RGBA_ASTC_12x10_KHR: 1306 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1307 case FORMAT_RGBA_ASTC_12x12_KHR: 1308 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 1309 return 16 * ((width + 11) / 12); 1310 #if S3TC_SUPPORT 1311 case FORMAT_DXT3: 1312 case FORMAT_DXT5: 1313 return 16 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per 4 rows 1314 #endif 1315 case FORMAT_ATI1: 1316 return 2 * ((width + 3) / 4); // 64 bit per 4x4 block, computed per row 1317 case FORMAT_ATI2: 1318 return 4 * ((width + 3) / 4); // 128 bit per 4x4 block, computed per row 1319 case FORMAT_YV12_BT601: 1320 case FORMAT_YV12_BT709: 1321 case FORMAT_YV12_JFIF: 1322 return align(width, 16); 1323 default: 1324 return bytes(format) * width; 1325 } 1326 } 1327 1328 int Surface::pitchP(int width, Format format, bool target) 1329 { 1330 int B = bytes(format); 1331 1332 return B > 0 ? pitchB(width, format, target) / B : 0; 1333 } 1334 1335 int Surface::sliceB(int width, int height, Format format, bool target) 1336 { 1337 if(target || isDepth(format) || isStencil(format)) 1338 { 1339 height = ((height + 1) & ~1); 1340 } 1341 1342 switch(format) 1343 { 1344 #if S3TC_SUPPORT 1345 case FORMAT_DXT1: 1346 case FORMAT_DXT3: 1347 case FORMAT_DXT5: 1348 #endif 1349 case FORMAT_ETC1: 1350 case FORMAT_R11_EAC: 1351 case FORMAT_SIGNED_R11_EAC: 1352 case FORMAT_RG11_EAC: 1353 case FORMAT_SIGNED_RG11_EAC: 1354 case FORMAT_RGB8_ETC2: 1355 case FORMAT_SRGB8_ETC2: 1356 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1357 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 1358 case FORMAT_RGBA8_ETC2_EAC: 1359 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 1360 case FORMAT_RGBA_ASTC_4x4_KHR: 1361 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 1362 case FORMAT_RGBA_ASTC_5x4_KHR: 1363 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 1364 return pitchB(width, format, target) * ((height + 3) / 4); // Pitch computed per 4 rows 1365 case FORMAT_RGBA_ASTC_5x5_KHR: 1366 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 1367 case FORMAT_RGBA_ASTC_6x5_KHR: 1368 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 1369 case FORMAT_RGBA_ASTC_8x5_KHR: 1370 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 1371 case FORMAT_RGBA_ASTC_10x5_KHR: 1372 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 1373 return pitchB(width, format, target) * ((height + 4) / 5); // Pitch computed per 5 rows 1374 case FORMAT_RGBA_ASTC_6x6_KHR: 1375 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 1376 case FORMAT_RGBA_ASTC_8x6_KHR: 1377 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 1378 case FORMAT_RGBA_ASTC_10x6_KHR: 1379 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 1380 return pitchB(width, format, target) * ((height + 5) / 6); // Pitch computed per 6 rows 1381 case FORMAT_RGBA_ASTC_8x8_KHR: 1382 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 1383 case FORMAT_RGBA_ASTC_10x8_KHR: 1384 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 1385 return pitchB(width, format, target) * ((height + 7) / 8); // Pitch computed per 8 rows 1386 case FORMAT_RGBA_ASTC_10x10_KHR: 1387 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 1388 case FORMAT_RGBA_ASTC_12x10_KHR: 1389 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 1390 return pitchB(width, format, target) * ((height + 9) / 10); // Pitch computed per 10 rows 1391 case FORMAT_RGBA_ASTC_12x12_KHR: 1392 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 1393 return pitchB(width, format, target) * ((height + 11) / 12); // Pitch computed per 12 rows 1394 case FORMAT_ATI1: 1395 case FORMAT_ATI2: 1396 default: 1397 return pitchB(width, format, target) * height; // Pitch computed per row 1398 } 1399 } 1400 1401 int Surface::sliceP(int width, int height, Format format, bool target) 1402 { 1403 int B = bytes(format); 1404 1405 return B > 0 ? sliceB(width, height, format, target) / B : 0; 1406 } 1407 1408 void Surface::update(Buffer &destination, Buffer &source) 1409 { 1410 // ASSERT(source.lock != LOCK_UNLOCKED); 1411 // ASSERT(destination.lock != LOCK_UNLOCKED); 1412 1413 if(destination.buffer != source.buffer) 1414 { 1415 ASSERT(source.dirty && !destination.dirty); 1416 1417 switch(source.format) 1418 { 1419 case FORMAT_R8G8B8: decodeR8G8B8(destination, source); break; // FIXME: Check destination format 1420 case FORMAT_X1R5G5B5: decodeX1R5G5B5(destination, source); break; // FIXME: Check destination format 1421 case FORMAT_A1R5G5B5: decodeA1R5G5B5(destination, source); break; // FIXME: Check destination format 1422 case FORMAT_X4R4G4B4: decodeX4R4G4B4(destination, source); break; // FIXME: Check destination format 1423 case FORMAT_A4R4G4B4: decodeA4R4G4B4(destination, source); break; // FIXME: Check destination format 1424 case FORMAT_P8: decodeP8(destination, source); break; // FIXME: Check destination format 1425 #if S3TC_SUPPORT 1426 case FORMAT_DXT1: decodeDXT1(destination, source); break; // FIXME: Check destination format 1427 case FORMAT_DXT3: decodeDXT3(destination, source); break; // FIXME: Check destination format 1428 case FORMAT_DXT5: decodeDXT5(destination, source); break; // FIXME: Check destination format 1429 #endif 1430 case FORMAT_ATI1: decodeATI1(destination, source); break; // FIXME: Check destination format 1431 case FORMAT_ATI2: decodeATI2(destination, source); break; // FIXME: Check destination format 1432 case FORMAT_R11_EAC: decodeEAC(destination, source, 1, false); break; // FIXME: Check destination format 1433 case FORMAT_SIGNED_R11_EAC: decodeEAC(destination, source, 1, true); break; // FIXME: Check destination format 1434 case FORMAT_RG11_EAC: decodeEAC(destination, source, 2, false); break; // FIXME: Check destination format 1435 case FORMAT_SIGNED_RG11_EAC: decodeEAC(destination, source, 2, true); break; // FIXME: Check destination format 1436 case FORMAT_ETC1: 1437 case FORMAT_RGB8_ETC2: decodeETC2(destination, source, 0, false); break; // FIXME: Check destination format 1438 case FORMAT_SRGB8_ETC2: decodeETC2(destination, source, 0, true); break; // FIXME: Check destination format 1439 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, false); break; // FIXME: Check destination format 1440 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: decodeETC2(destination, source, 1, true); break; // FIXME: Check destination format 1441 case FORMAT_RGBA8_ETC2_EAC: decodeETC2(destination, source, 8, false); break; // FIXME: Check destination format 1442 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: decodeETC2(destination, source, 8, true); break; // FIXME: Check destination format 1443 case FORMAT_RGBA_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, false); break; // FIXME: Check destination format 1444 case FORMAT_RGBA_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, false); break; // FIXME: Check destination format 1445 case FORMAT_RGBA_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, false); break; // FIXME: Check destination format 1446 case FORMAT_RGBA_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, false); break; // FIXME: Check destination format 1447 case FORMAT_RGBA_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, false); break; // FIXME: Check destination format 1448 case FORMAT_RGBA_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, false); break; // FIXME: Check destination format 1449 case FORMAT_RGBA_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, false); break; // FIXME: Check destination format 1450 case FORMAT_RGBA_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, false); break; // FIXME: Check destination format 1451 case FORMAT_RGBA_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, false); break; // FIXME: Check destination format 1452 case FORMAT_RGBA_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, false); break; // FIXME: Check destination format 1453 case FORMAT_RGBA_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, false); break; // FIXME: Check destination format 1454 case FORMAT_RGBA_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, false); break; // FIXME: Check destination format 1455 case FORMAT_RGBA_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, false); break; // FIXME: Check destination format 1456 case FORMAT_RGBA_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, false); break; // FIXME: Check destination format 1457 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: decodeASTC(destination, source, 4, 4, 1, true); break; // FIXME: Check destination format 1458 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: decodeASTC(destination, source, 5, 4, 1, true); break; // FIXME: Check destination format 1459 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: decodeASTC(destination, source, 5, 5, 1, true); break; // FIXME: Check destination format 1460 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: decodeASTC(destination, source, 6, 5, 1, true); break; // FIXME: Check destination format 1461 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: decodeASTC(destination, source, 6, 6, 1, true); break; // FIXME: Check destination format 1462 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: decodeASTC(destination, source, 8, 5, 1, true); break; // FIXME: Check destination format 1463 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: decodeASTC(destination, source, 8, 6, 1, true); break; // FIXME: Check destination format 1464 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: decodeASTC(destination, source, 8, 8, 1, true); break; // FIXME: Check destination format 1465 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: decodeASTC(destination, source, 10, 5, 1, true); break; // FIXME: Check destination format 1466 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: decodeASTC(destination, source, 10, 6, 1, true); break; // FIXME: Check destination format 1467 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: decodeASTC(destination, source, 10, 8, 1, true); break; // FIXME: Check destination format 1468 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: decodeASTC(destination, source, 10, 10, 1, true); break; // FIXME: Check destination format 1469 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: decodeASTC(destination, source, 12, 10, 1, true); break; // FIXME: Check destination format 1470 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: decodeASTC(destination, source, 12, 12, 1, true); break; // FIXME: Check destination format 1471 default: genericUpdate(destination, source); break; 1472 } 1473 } 1474 } 1475 1476 void Surface::genericUpdate(Buffer &destination, Buffer &source) 1477 { 1478 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1479 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1480 1481 int depth = min(destination.depth, source.depth); 1482 int height = min(destination.height, source.height); 1483 int width = min(destination.width, source.width); 1484 int rowBytes = width * source.bytes; 1485 1486 for(int z = 0; z < depth; z++) 1487 { 1488 unsigned char *sourceRow = sourceSlice; 1489 unsigned char *destinationRow = destinationSlice; 1490 1491 for(int y = 0; y < height; y++) 1492 { 1493 if(source.format == destination.format) 1494 { 1495 memcpy(destinationRow, sourceRow, rowBytes); 1496 } 1497 else 1498 { 1499 unsigned char *sourceElement = sourceRow; 1500 unsigned char *destinationElement = destinationRow; 1501 1502 for(int x = 0; x < width; x++) 1503 { 1504 Color<float> color = source.read(sourceElement); 1505 destination.write(destinationElement, color); 1506 1507 sourceElement += source.bytes; 1508 destinationElement += destination.bytes; 1509 } 1510 } 1511 1512 sourceRow += source.pitchB; 1513 destinationRow += destination.pitchB; 1514 } 1515 1516 sourceSlice += source.sliceB; 1517 destinationSlice += destination.sliceB; 1518 } 1519 } 1520 1521 void Surface::decodeR8G8B8(Buffer &destination, const Buffer &source) 1522 { 1523 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1524 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1525 1526 for(int z = 0; z < destination.depth && z < source.depth; z++) 1527 { 1528 unsigned char *sourceRow = sourceSlice; 1529 unsigned char *destinationRow = destinationSlice; 1530 1531 for(int y = 0; y < destination.height && y < source.height; y++) 1532 { 1533 unsigned char *sourceElement = sourceRow; 1534 unsigned char *destinationElement = destinationRow; 1535 1536 for(int x = 0; x < destination.width && x < source.width; x++) 1537 { 1538 unsigned int b = sourceElement[0]; 1539 unsigned int g = sourceElement[1]; 1540 unsigned int r = sourceElement[2]; 1541 1542 *(unsigned int*)destinationElement = 0xFF000000 | (r << 16) | (g << 8) | (b << 0); 1543 1544 sourceElement += source.bytes; 1545 destinationElement += destination.bytes; 1546 } 1547 1548 sourceRow += source.pitchB; 1549 destinationRow += destination.pitchB; 1550 } 1551 1552 sourceSlice += source.sliceB; 1553 destinationSlice += destination.sliceB; 1554 } 1555 } 1556 1557 void Surface::decodeX1R5G5B5(Buffer &destination, const Buffer &source) 1558 { 1559 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1560 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1561 1562 for(int z = 0; z < destination.depth && z < source.depth; z++) 1563 { 1564 unsigned char *sourceRow = sourceSlice; 1565 unsigned char *destinationRow = destinationSlice; 1566 1567 for(int y = 0; y < destination.height && y < source.height; y++) 1568 { 1569 unsigned char *sourceElement = sourceRow; 1570 unsigned char *destinationElement = destinationRow; 1571 1572 for(int x = 0; x < destination.width && x < source.width; x++) 1573 { 1574 unsigned int xrgb = *(unsigned short*)sourceElement; 1575 1576 unsigned int r = (((xrgb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000; 1577 unsigned int g = (((xrgb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00; 1578 unsigned int b = (((xrgb & 0x001F) * 2106 + 0x80) >> 8); 1579 1580 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b; 1581 1582 sourceElement += source.bytes; 1583 destinationElement += destination.bytes; 1584 } 1585 1586 sourceRow += source.pitchB; 1587 destinationRow += destination.pitchB; 1588 } 1589 1590 sourceSlice += source.sliceB; 1591 destinationSlice += destination.sliceB; 1592 } 1593 } 1594 1595 void Surface::decodeA1R5G5B5(Buffer &destination, const Buffer &source) 1596 { 1597 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1598 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1599 1600 for(int z = 0; z < destination.depth && z < source.depth; z++) 1601 { 1602 unsigned char *sourceRow = sourceSlice; 1603 unsigned char *destinationRow = destinationSlice; 1604 1605 for(int y = 0; y < destination.height && y < source.height; y++) 1606 { 1607 unsigned char *sourceElement = sourceRow; 1608 unsigned char *destinationElement = destinationRow; 1609 1610 for(int x = 0; x < destination.width && x < source.width; x++) 1611 { 1612 unsigned int argb = *(unsigned short*)sourceElement; 1613 1614 unsigned int a = (argb & 0x8000) * 130560; 1615 unsigned int r = (((argb & 0x7C00) * 134771 + 0x800000) >> 8) & 0x00FF0000; 1616 unsigned int g = (((argb & 0x03E0) * 16846 + 0x8000) >> 8) & 0x0000FF00; 1617 unsigned int b = (((argb & 0x001F) * 2106 + 0x80) >> 8); 1618 1619 *(unsigned int*)destinationElement = a | r | g | b; 1620 1621 sourceElement += source.bytes; 1622 destinationElement += destination.bytes; 1623 } 1624 1625 sourceRow += source.pitchB; 1626 destinationRow += destination.pitchB; 1627 } 1628 1629 sourceSlice += source.sliceB; 1630 destinationSlice += destination.sliceB; 1631 } 1632 } 1633 1634 void Surface::decodeX4R4G4B4(Buffer &destination, const Buffer &source) 1635 { 1636 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1637 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1638 1639 for(int z = 0; z < destination.depth && z < source.depth; z++) 1640 { 1641 unsigned char *sourceRow = sourceSlice; 1642 unsigned char *destinationRow = destinationSlice; 1643 1644 for(int y = 0; y < destination.height && y < source.height; y++) 1645 { 1646 unsigned char *sourceElement = sourceRow; 1647 unsigned char *destinationElement = destinationRow; 1648 1649 for(int x = 0; x < destination.width && x < source.width; x++) 1650 { 1651 unsigned int xrgb = *(unsigned short*)sourceElement; 1652 1653 unsigned int r = ((xrgb & 0x0F00) * 0x00001100) & 0x00FF0000; 1654 unsigned int g = ((xrgb & 0x00F0) * 0x00000110) & 0x0000FF00; 1655 unsigned int b = (xrgb & 0x000F) * 0x00000011; 1656 1657 *(unsigned int*)destinationElement = 0xFF000000 | r | g | b; 1658 1659 sourceElement += source.bytes; 1660 destinationElement += destination.bytes; 1661 } 1662 1663 sourceRow += source.pitchB; 1664 destinationRow += destination.pitchB; 1665 } 1666 1667 sourceSlice += source.sliceB; 1668 destinationSlice += destination.sliceB; 1669 } 1670 } 1671 1672 void Surface::decodeA4R4G4B4(Buffer &destination, const Buffer &source) 1673 { 1674 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1675 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1676 1677 for(int z = 0; z < destination.depth && z < source.depth; z++) 1678 { 1679 unsigned char *sourceRow = sourceSlice; 1680 unsigned char *destinationRow = destinationSlice; 1681 1682 for(int y = 0; y < destination.height && y < source.height; y++) 1683 { 1684 unsigned char *sourceElement = sourceRow; 1685 unsigned char *destinationElement = destinationRow; 1686 1687 for(int x = 0; x < destination.width && x < source.width; x++) 1688 { 1689 unsigned int argb = *(unsigned short*)sourceElement; 1690 1691 unsigned int a = ((argb & 0xF000) * 0x00011000) & 0xFF000000; 1692 unsigned int r = ((argb & 0x0F00) * 0x00001100) & 0x00FF0000; 1693 unsigned int g = ((argb & 0x00F0) * 0x00000110) & 0x0000FF00; 1694 unsigned int b = (argb & 0x000F) * 0x00000011; 1695 1696 *(unsigned int*)destinationElement = a | r | g | b; 1697 1698 sourceElement += source.bytes; 1699 destinationElement += destination.bytes; 1700 } 1701 1702 sourceRow += source.pitchB; 1703 destinationRow += destination.pitchB; 1704 } 1705 1706 sourceSlice += source.sliceB; 1707 destinationSlice += destination.sliceB; 1708 } 1709 } 1710 1711 void Surface::decodeP8(Buffer &destination, const Buffer &source) 1712 { 1713 unsigned char *sourceSlice = (unsigned char*)source.buffer; 1714 unsigned char *destinationSlice = (unsigned char*)destination.buffer; 1715 1716 for(int z = 0; z < destination.depth && z < source.depth; z++) 1717 { 1718 unsigned char *sourceRow = sourceSlice; 1719 unsigned char *destinationRow = destinationSlice; 1720 1721 for(int y = 0; y < destination.height && y < source.height; y++) 1722 { 1723 unsigned char *sourceElement = sourceRow; 1724 unsigned char *destinationElement = destinationRow; 1725 1726 for(int x = 0; x < destination.width && x < source.width; x++) 1727 { 1728 unsigned int abgr = palette[*(unsigned char*)sourceElement]; 1729 1730 unsigned int r = (abgr & 0x000000FF) << 16; 1731 unsigned int g = (abgr & 0x0000FF00) << 0; 1732 unsigned int b = (abgr & 0x00FF0000) >> 16; 1733 unsigned int a = (abgr & 0xFF000000) >> 0; 1734 1735 *(unsigned int*)destinationElement = a | r | g | b; 1736 1737 sourceElement += source.bytes; 1738 destinationElement += destination.bytes; 1739 } 1740 1741 sourceRow += source.pitchB; 1742 destinationRow += destination.pitchB; 1743 } 1744 1745 sourceSlice += source.sliceB; 1746 destinationSlice += destination.sliceB; 1747 } 1748 } 1749 1750#if S3TC_SUPPORT 1751 void Surface::decodeDXT1(Buffer &internal, const Buffer &external) 1752 { 1753 unsigned int *destSlice = (unsigned int*)internal.buffer; 1754 const DXT1 *source = (const DXT1*)external.buffer; 1755 1756 for(int z = 0; z < external.depth; z++) 1757 { 1758 unsigned int *dest = destSlice; 1759 1760 for(int y = 0; y < external.height; y += 4) 1761 { 1762 for(int x = 0; x < external.width; x += 4) 1763 { 1764 Color<byte> c[4]; 1765 1766 c[0] = source->c0; 1767 c[1] = source->c1; 1768 1769 if(source->c0 > source->c1) // No transparency 1770 { 1771 // c2 = 2 / 3 * c0 + 1 / 3 * c1 1772 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 1773 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 1774 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 1775 c[2].a = 0xFF; 1776 1777 // c3 = 1 / 3 * c0 + 2 / 3 * c1 1778 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 1779 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 1780 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 1781 c[3].a = 0xFF; 1782 } 1783 else // c3 transparent 1784 { 1785 // c2 = 1 / 2 * c0 + 1 / 2 * c1 1786 c[2].r = (byte)(((word)c[0].r + (word)c[1].r) / 2); 1787 c[2].g = (byte)(((word)c[0].g + (word)c[1].g) / 2); 1788 c[2].b = (byte)(((word)c[0].b + (word)c[1].b) / 2); 1789 c[2].a = 0xFF; 1790 1791 c[3].r = 0; 1792 c[3].g = 0; 1793 c[3].b = 0; 1794 c[3].a = 0; 1795 } 1796 1797 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 1798 { 1799 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 1800 { 1801 dest[(x + i) + (y + j) * internal.width] = c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4]; 1802 } 1803 } 1804 1805 source++; 1806 } 1807 } 1808 1809 (byte*&)destSlice += internal.sliceB; 1810 } 1811 } 1812 1813 void Surface::decodeDXT3(Buffer &internal, const Buffer &external) 1814 { 1815 unsigned int *destSlice = (unsigned int*)internal.buffer; 1816 const DXT3 *source = (const DXT3*)external.buffer; 1817 1818 for(int z = 0; z < external.depth; z++) 1819 { 1820 unsigned int *dest = destSlice; 1821 1822 for(int y = 0; y < external.height; y += 4) 1823 { 1824 for(int x = 0; x < external.width; x += 4) 1825 { 1826 Color<byte> c[4]; 1827 1828 c[0] = source->c0; 1829 c[1] = source->c1; 1830 1831 // c2 = 2 / 3 * c0 + 1 / 3 * c1 1832 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 1833 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 1834 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 1835 1836 // c3 = 1 / 3 * c0 + 2 / 3 * c1 1837 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 1838 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 1839 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 1840 1841 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 1842 { 1843 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 1844 { 1845 unsigned int a = (unsigned int)(source->a >> 4 * (i + j * 4)) & 0x0F; 1846 unsigned int color = (c[(unsigned int)(source->lut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | ((a << 28) + (a << 24)); 1847 1848 dest[(x + i) + (y + j) * internal.width] = color; 1849 } 1850 } 1851 1852 source++; 1853 } 1854 } 1855 1856 (byte*&)destSlice += internal.sliceB; 1857 } 1858 } 1859 1860 void Surface::decodeDXT5(Buffer &internal, const Buffer &external) 1861 { 1862 unsigned int *destSlice = (unsigned int*)internal.buffer; 1863 const DXT5 *source = (const DXT5*)external.buffer; 1864 1865 for(int z = 0; z < external.depth; z++) 1866 { 1867 unsigned int *dest = destSlice; 1868 1869 for(int y = 0; y < external.height; y += 4) 1870 { 1871 for(int x = 0; x < external.width; x += 4) 1872 { 1873 Color<byte> c[4]; 1874 1875 c[0] = source->c0; 1876 c[1] = source->c1; 1877 1878 // c2 = 2 / 3 * c0 + 1 / 3 * c1 1879 c[2].r = (byte)((2 * (word)c[0].r + (word)c[1].r + 1) / 3); 1880 c[2].g = (byte)((2 * (word)c[0].g + (word)c[1].g + 1) / 3); 1881 c[2].b = (byte)((2 * (word)c[0].b + (word)c[1].b + 1) / 3); 1882 1883 // c3 = 1 / 3 * c0 + 2 / 3 * c1 1884 c[3].r = (byte)(((word)c[0].r + 2 * (word)c[1].r + 1) / 3); 1885 c[3].g = (byte)(((word)c[0].g + 2 * (word)c[1].g + 1) / 3); 1886 c[3].b = (byte)(((word)c[0].b + 2 * (word)c[1].b + 1) / 3); 1887 1888 byte a[8]; 1889 1890 a[0] = source->a0; 1891 a[1] = source->a1; 1892 1893 if(a[0] > a[1]) 1894 { 1895 a[2] = (byte)((6 * (word)a[0] + 1 * (word)a[1] + 3) / 7); 1896 a[3] = (byte)((5 * (word)a[0] + 2 * (word)a[1] + 3) / 7); 1897 a[4] = (byte)((4 * (word)a[0] + 3 * (word)a[1] + 3) / 7); 1898 a[5] = (byte)((3 * (word)a[0] + 4 * (word)a[1] + 3) / 7); 1899 a[6] = (byte)((2 * (word)a[0] + 5 * (word)a[1] + 3) / 7); 1900 a[7] = (byte)((1 * (word)a[0] + 6 * (word)a[1] + 3) / 7); 1901 } 1902 else 1903 { 1904 a[2] = (byte)((4 * (word)a[0] + 1 * (word)a[1] + 2) / 5); 1905 a[3] = (byte)((3 * (word)a[0] + 2 * (word)a[1] + 2) / 5); 1906 a[4] = (byte)((2 * (word)a[0] + 3 * (word)a[1] + 2) / 5); 1907 a[5] = (byte)((1 * (word)a[0] + 4 * (word)a[1] + 2) / 5); 1908 a[6] = 0; 1909 a[7] = 0xFF; 1910 } 1911 1912 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 1913 { 1914 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 1915 { 1916 unsigned int alpha = (unsigned int)a[(unsigned int)(source->alut >> (16 + 3 * (i + j * 4))) % 8] << 24; 1917 unsigned int color = (c[(source->clut >> 2 * (i + j * 4)) % 4] & 0x00FFFFFF) | alpha; 1918 1919 dest[(x + i) + (y + j) * internal.width] = color; 1920 } 1921 } 1922 1923 source++; 1924 } 1925 } 1926 1927 (byte*&)destSlice += internal.sliceB; 1928 } 1929 } 1930#endif 1931 1932 void Surface::decodeATI1(Buffer &internal, const Buffer &external) 1933 { 1934 byte *destSlice = (byte*)internal.buffer; 1935 const ATI1 *source = (const ATI1*)external.buffer; 1936 1937 for(int z = 0; z < external.depth; z++) 1938 { 1939 byte *dest = destSlice; 1940 1941 for(int y = 0; y < external.height; y += 4) 1942 { 1943 for(int x = 0; x < external.width; x += 4) 1944 { 1945 byte r[8]; 1946 1947 r[0] = source->r0; 1948 r[1] = source->r1; 1949 1950 if(r[0] > r[1]) 1951 { 1952 r[2] = (byte)((6 * (word)r[0] + 1 * (word)r[1] + 3) / 7); 1953 r[3] = (byte)((5 * (word)r[0] + 2 * (word)r[1] + 3) / 7); 1954 r[4] = (byte)((4 * (word)r[0] + 3 * (word)r[1] + 3) / 7); 1955 r[5] = (byte)((3 * (word)r[0] + 4 * (word)r[1] + 3) / 7); 1956 r[6] = (byte)((2 * (word)r[0] + 5 * (word)r[1] + 3) / 7); 1957 r[7] = (byte)((1 * (word)r[0] + 6 * (word)r[1] + 3) / 7); 1958 } 1959 else 1960 { 1961 r[2] = (byte)((4 * (word)r[0] + 1 * (word)r[1] + 2) / 5); 1962 r[3] = (byte)((3 * (word)r[0] + 2 * (word)r[1] + 2) / 5); 1963 r[4] = (byte)((2 * (word)r[0] + 3 * (word)r[1] + 2) / 5); 1964 r[5] = (byte)((1 * (word)r[0] + 4 * (word)r[1] + 2) / 5); 1965 r[6] = 0; 1966 r[7] = 0xFF; 1967 } 1968 1969 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 1970 { 1971 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 1972 { 1973 dest[(x + i) + (y + j) * internal.width] = r[(unsigned int)(source->rlut >> (16 + 3 * (i + j * 4))) % 8]; 1974 } 1975 } 1976 1977 source++; 1978 } 1979 } 1980 1981 destSlice += internal.sliceB; 1982 } 1983 } 1984 1985 void Surface::decodeATI2(Buffer &internal, const Buffer &external) 1986 { 1987 word *destSlice = (word*)internal.buffer; 1988 const ATI2 *source = (const ATI2*)external.buffer; 1989 1990 for(int z = 0; z < external.depth; z++) 1991 { 1992 word *dest = destSlice; 1993 1994 for(int y = 0; y < external.height; y += 4) 1995 { 1996 for(int x = 0; x < external.width; x += 4) 1997 { 1998 byte X[8]; 1999 2000 X[0] = source->x0; 2001 X[1] = source->x1; 2002 2003 if(X[0] > X[1]) 2004 { 2005 X[2] = (byte)((6 * (word)X[0] + 1 * (word)X[1] + 3) / 7); 2006 X[3] = (byte)((5 * (word)X[0] + 2 * (word)X[1] + 3) / 7); 2007 X[4] = (byte)((4 * (word)X[0] + 3 * (word)X[1] + 3) / 7); 2008 X[5] = (byte)((3 * (word)X[0] + 4 * (word)X[1] + 3) / 7); 2009 X[6] = (byte)((2 * (word)X[0] + 5 * (word)X[1] + 3) / 7); 2010 X[7] = (byte)((1 * (word)X[0] + 6 * (word)X[1] + 3) / 7); 2011 } 2012 else 2013 { 2014 X[2] = (byte)((4 * (word)X[0] + 1 * (word)X[1] + 2) / 5); 2015 X[3] = (byte)((3 * (word)X[0] + 2 * (word)X[1] + 2) / 5); 2016 X[4] = (byte)((2 * (word)X[0] + 3 * (word)X[1] + 2) / 5); 2017 X[5] = (byte)((1 * (word)X[0] + 4 * (word)X[1] + 2) / 5); 2018 X[6] = 0; 2019 X[7] = 0xFF; 2020 } 2021 2022 byte Y[8]; 2023 2024 Y[0] = source->y0; 2025 Y[1] = source->y1; 2026 2027 if(Y[0] > Y[1]) 2028 { 2029 Y[2] = (byte)((6 * (word)Y[0] + 1 * (word)Y[1] + 3) / 7); 2030 Y[3] = (byte)((5 * (word)Y[0] + 2 * (word)Y[1] + 3) / 7); 2031 Y[4] = (byte)((4 * (word)Y[0] + 3 * (word)Y[1] + 3) / 7); 2032 Y[5] = (byte)((3 * (word)Y[0] + 4 * (word)Y[1] + 3) / 7); 2033 Y[6] = (byte)((2 * (word)Y[0] + 5 * (word)Y[1] + 3) / 7); 2034 Y[7] = (byte)((1 * (word)Y[0] + 6 * (word)Y[1] + 3) / 7); 2035 } 2036 else 2037 { 2038 Y[2] = (byte)((4 * (word)Y[0] + 1 * (word)Y[1] + 2) / 5); 2039 Y[3] = (byte)((3 * (word)Y[0] + 2 * (word)Y[1] + 2) / 5); 2040 Y[4] = (byte)((2 * (word)Y[0] + 3 * (word)Y[1] + 2) / 5); 2041 Y[5] = (byte)((1 * (word)Y[0] + 4 * (word)Y[1] + 2) / 5); 2042 Y[6] = 0; 2043 Y[7] = 0xFF; 2044 } 2045 2046 for(int j = 0; j < 4 && (y + j) < internal.height; j++) 2047 { 2048 for(int i = 0; i < 4 && (x + i) < internal.width; i++) 2049 { 2050 word r = X[(unsigned int)(source->xlut >> (16 + 3 * (i + j * 4))) % 8]; 2051 word g = Y[(unsigned int)(source->ylut >> (16 + 3 * (i + j * 4))) % 8]; 2052 2053 dest[(x + i) + (y + j) * internal.width] = (g << 8) + r; 2054 } 2055 } 2056 2057 source++; 2058 } 2059 } 2060 2061 (byte*&)destSlice += internal.sliceB; 2062 } 2063 } 2064 2065 void Surface::decodeETC2(Buffer &internal, const Buffer &external, int nbAlphaBits, bool isSRGB) 2066 { 2067 ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes, 2068 (nbAlphaBits == 8) ? ETC_Decoder::ETC_RGBA : ((nbAlphaBits == 1) ? ETC_Decoder::ETC_RGB_PUNCHTHROUGH_ALPHA : ETC_Decoder::ETC_RGB)); 2069 2070 if(isSRGB) 2071 { 2072 static byte sRGBtoLinearTable[256]; 2073 static bool sRGBtoLinearTableDirty = true; 2074 if(sRGBtoLinearTableDirty) 2075 { 2076 for(int i = 0; i < 256; i++) 2077 { 2078 sRGBtoLinearTable[i] = static_cast<byte>(sRGBtoLinear(static_cast<float>(i) / 255.0f) * 255.0f + 0.5f); 2079 } 2080 sRGBtoLinearTableDirty = false; 2081 } 2082 2083 // Perform sRGB conversion in place after decoding 2084 byte* src = (byte*)internal.buffer; 2085 for(int y = 0; y < internal.height; y++) 2086 { 2087 byte* srcRow = src + y * internal.pitchB; 2088 for(int x = 0; x < internal.width; x++) 2089 { 2090 byte* srcPix = srcRow + x * internal.bytes; 2091 for(int i = 0; i < 3; i++) 2092 { 2093 srcPix[i] = sRGBtoLinearTable[srcPix[i]]; 2094 } 2095 } 2096 } 2097 } 2098 } 2099 2100 void Surface::decodeEAC(Buffer &internal, const Buffer &external, int nbChannels, bool isSigned) 2101 { 2102 ASSERT(nbChannels == 1 || nbChannels == 2); 2103 2104 ETC_Decoder::Decode((const byte*)external.buffer, (byte*)internal.buffer, external.width, external.height, internal.width, internal.height, internal.pitchB, internal.bytes, 2105 (nbChannels == 1) ? (isSigned ? ETC_Decoder::ETC_R_SIGNED : ETC_Decoder::ETC_R_UNSIGNED) : (isSigned ? ETC_Decoder::ETC_RG_SIGNED : ETC_Decoder::ETC_RG_UNSIGNED)); 2106 2107 // FIXME: We convert signed data to float, until signed integer internal formats are supported 2108 // This code can be removed if signed ETC2 images are decoded to internal 8 bit signed R/RG formats 2109 if(isSigned) 2110 { 2111 sbyte* src = (sbyte*)internal.buffer; 2112 2113 for(int y = 0; y < internal.height; y++) 2114 { 2115 sbyte* srcRow = src + y * internal.pitchB; 2116 for(int x = internal.width - 1; x >= 0; x--) 2117 { 2118 int dx = x & 0xFFFFFFFC; 2119 int mx = x - dx; 2120 sbyte* srcPix = srcRow + dx * internal.bytes + mx * nbChannels; 2121 float* dstPix = (float*)(srcRow + x * internal.bytes); 2122 for(int c = nbChannels - 1; c >= 0; c--) 2123 { 2124 static const float normalization = 1.0f / 127.875f; 2125 dstPix[c] = clamp(static_cast<float>(srcPix[c]) * normalization, -1.0f, 1.0f); 2126 } 2127 } 2128 } 2129 } 2130 } 2131 2132 void Surface::decodeASTC(Buffer &internal, const Buffer &external, int xBlockSize, int yBlockSize, int zBlockSize, bool isSRGB) 2133 { 2134 } 2135 2136 unsigned int Surface::size(int width, int height, int depth, Format format) 2137 { 2138 // Dimensions rounded up to multiples of 4, used for compressed formats 2139 int width4 = align(width, 4); 2140 int height4 = align(height, 4); 2141 2142 switch(format) 2143 { 2144 #if S3TC_SUPPORT 2145 case FORMAT_DXT1: 2146 #endif 2147 case FORMAT_ATI1: 2148 case FORMAT_ETC1: 2149 case FORMAT_R11_EAC: 2150 case FORMAT_SIGNED_R11_EAC: 2151 case FORMAT_RGB8_ETC2: 2152 case FORMAT_SRGB8_ETC2: 2153 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2154 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2155 return width4 * height4 * depth / 2; 2156 #if S3TC_SUPPORT 2157 case FORMAT_DXT3: 2158 case FORMAT_DXT5: 2159 #endif 2160 case FORMAT_ATI2: 2161 case FORMAT_RG11_EAC: 2162 case FORMAT_SIGNED_RG11_EAC: 2163 case FORMAT_RGBA8_ETC2_EAC: 2164 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 2165 case FORMAT_RGBA_ASTC_4x4_KHR: 2166 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 2167 return width4 * height4 * depth; 2168 case FORMAT_RGBA_ASTC_5x4_KHR: 2169 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 2170 return align(width, 5) * height4 * depth; 2171 case FORMAT_RGBA_ASTC_5x5_KHR: 2172 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 2173 return align(width, 5) * align(height, 5) * depth; 2174 case FORMAT_RGBA_ASTC_6x5_KHR: 2175 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 2176 return align(width, 6) * align(height, 5) * depth; 2177 case FORMAT_RGBA_ASTC_6x6_KHR: 2178 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 2179 return align(width, 6) * align(height, 6) * depth; 2180 case FORMAT_RGBA_ASTC_8x5_KHR: 2181 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 2182 return align(width, 8) * align(height, 5) * depth; 2183 case FORMAT_RGBA_ASTC_8x6_KHR: 2184 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 2185 return align(width, 8) * align(height, 6) * depth; 2186 case FORMAT_RGBA_ASTC_8x8_KHR: 2187 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 2188 return align(width, 8) * align(height, 8) * depth; 2189 case FORMAT_RGBA_ASTC_10x5_KHR: 2190 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 2191 return align(width, 10) * align(height, 5) * depth; 2192 case FORMAT_RGBA_ASTC_10x6_KHR: 2193 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 2194 return align(width, 10) * align(height, 6) * depth; 2195 case FORMAT_RGBA_ASTC_10x8_KHR: 2196 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 2197 return align(width, 10) * align(height, 8) * depth; 2198 case FORMAT_RGBA_ASTC_10x10_KHR: 2199 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 2200 return align(width, 10) * align(height, 10) * depth; 2201 case FORMAT_RGBA_ASTC_12x10_KHR: 2202 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 2203 return align(width, 12) * align(height, 10) * depth; 2204 case FORMAT_RGBA_ASTC_12x12_KHR: 2205 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 2206 return align(width, 12) * align(height, 12) * depth; 2207 case FORMAT_YV12_BT601: 2208 case FORMAT_YV12_BT709: 2209 case FORMAT_YV12_JFIF: 2210 { 2211 unsigned int YStride = align(width, 16); 2212 unsigned int YSize = YStride * height; 2213 unsigned int CStride = align(YStride / 2, 16); 2214 unsigned int CSize = CStride * height / 2; 2215 2216 return YSize + 2 * CSize; 2217 } 2218 default: 2219 return bytes(format) * width * height * depth; 2220 } 2221 2222 return 0; 2223 } 2224 2225 bool Surface::isStencil(Format format) 2226 { 2227 switch(format) 2228 { 2229 case FORMAT_D32: 2230 case FORMAT_D16: 2231 case FORMAT_D24X8: 2232 case FORMAT_D32F: 2233 case FORMAT_D32F_COMPLEMENTARY: 2234 case FORMAT_D32F_LOCKABLE: 2235 return false; 2236 case FORMAT_D24S8: 2237 case FORMAT_D24FS8: 2238 case FORMAT_S8: 2239 case FORMAT_DF24S8: 2240 case FORMAT_DF16S8: 2241 case FORMAT_D32FS8_TEXTURE: 2242 case FORMAT_D32FS8_SHADOW: 2243 case FORMAT_INTZ: 2244 return true; 2245 default: 2246 return false; 2247 } 2248 } 2249 2250 bool Surface::isDepth(Format format) 2251 { 2252 switch(format) 2253 { 2254 case FORMAT_D32: 2255 case FORMAT_D16: 2256 case FORMAT_D24X8: 2257 case FORMAT_D24S8: 2258 case FORMAT_D24FS8: 2259 case FORMAT_D32F: 2260 case FORMAT_D32F_COMPLEMENTARY: 2261 case FORMAT_D32F_LOCKABLE: 2262 case FORMAT_DF24S8: 2263 case FORMAT_DF16S8: 2264 case FORMAT_D32FS8_TEXTURE: 2265 case FORMAT_D32FS8_SHADOW: 2266 case FORMAT_INTZ: 2267 return true; 2268 case FORMAT_S8: 2269 return false; 2270 default: 2271 return false; 2272 } 2273 } 2274 2275 bool Surface::isPalette(Format format) 2276 { 2277 switch(format) 2278 { 2279 case FORMAT_P8: 2280 case FORMAT_A8P8: 2281 return true; 2282 default: 2283 return false; 2284 } 2285 } 2286 2287 bool Surface::isFloatFormat(Format format) 2288 { 2289 switch(format) 2290 { 2291 case FORMAT_R5G6B5: 2292 case FORMAT_X8R8G8B8: 2293 case FORMAT_X8B8G8R8I: 2294 case FORMAT_X8B8G8R8: 2295 case FORMAT_A8R8G8B8: 2296 case FORMAT_A8B8G8R8I: 2297 case FORMAT_R8UI: 2298 case FORMAT_G8R8UI: 2299 case FORMAT_X8B8G8R8UI: 2300 case FORMAT_A8B8G8R8UI: 2301 case FORMAT_A8B8G8R8: 2302 case FORMAT_G8R8I: 2303 case FORMAT_G8R8: 2304 case FORMAT_R8I_SNORM: 2305 case FORMAT_G8R8I_SNORM: 2306 case FORMAT_X8B8G8R8I_SNORM: 2307 case FORMAT_A8B8G8R8I_SNORM: 2308 case FORMAT_R16I: 2309 case FORMAT_R16UI: 2310 case FORMAT_G16R16I: 2311 case FORMAT_G16R16UI: 2312 case FORMAT_G16R16: 2313 case FORMAT_X16B16G16R16I: 2314 case FORMAT_X16B16G16R16UI: 2315 case FORMAT_A16B16G16R16I: 2316 case FORMAT_A16B16G16R16UI: 2317 case FORMAT_A16B16G16R16: 2318 case FORMAT_V8U8: 2319 case FORMAT_Q8W8V8U8: 2320 case FORMAT_X8L8V8U8: 2321 case FORMAT_V16U16: 2322 case FORMAT_A16W16V16U16: 2323 case FORMAT_Q16W16V16U16: 2324 case FORMAT_A8: 2325 case FORMAT_R8I: 2326 case FORMAT_R8: 2327 case FORMAT_L8: 2328 case FORMAT_L16: 2329 case FORMAT_A8L8: 2330 case FORMAT_YV12_BT601: 2331 case FORMAT_YV12_BT709: 2332 case FORMAT_YV12_JFIF: 2333 case FORMAT_R32I: 2334 case FORMAT_R32UI: 2335 case FORMAT_G32R32I: 2336 case FORMAT_G32R32UI: 2337 case FORMAT_X32B32G32R32I: 2338 case FORMAT_X32B32G32R32UI: 2339 case FORMAT_A32B32G32R32I: 2340 case FORMAT_A32B32G32R32UI: 2341 return false; 2342 case FORMAT_R32F: 2343 case FORMAT_G32R32F: 2344 case FORMAT_A32B32G32R32F: 2345 case FORMAT_D32F: 2346 case FORMAT_D32F_COMPLEMENTARY: 2347 case FORMAT_D32F_LOCKABLE: 2348 case FORMAT_D32FS8_TEXTURE: 2349 case FORMAT_D32FS8_SHADOW: 2350 case FORMAT_L16F: 2351 case FORMAT_A16L16F: 2352 case FORMAT_L32F: 2353 case FORMAT_A32L32F: 2354 return true; 2355 default: 2356 ASSERT(false); 2357 } 2358 2359 return false; 2360 } 2361 2362 bool Surface::isUnsignedComponent(Format format, int component) 2363 { 2364 switch(format) 2365 { 2366 case FORMAT_NULL: 2367 case FORMAT_R5G6B5: 2368 case FORMAT_X8R8G8B8: 2369 case FORMAT_X8B8G8R8: 2370 case FORMAT_A8R8G8B8: 2371 case FORMAT_A8B8G8R8: 2372 case FORMAT_G8R8: 2373 case FORMAT_G16R16: 2374 case FORMAT_A16B16G16R16: 2375 case FORMAT_D32F: 2376 case FORMAT_D32F_COMPLEMENTARY: 2377 case FORMAT_D32F_LOCKABLE: 2378 case FORMAT_D32FS8_TEXTURE: 2379 case FORMAT_D32FS8_SHADOW: 2380 case FORMAT_A8: 2381 case FORMAT_R8: 2382 case FORMAT_L8: 2383 case FORMAT_L16: 2384 case FORMAT_A8L8: 2385 case FORMAT_YV12_BT601: 2386 case FORMAT_YV12_BT709: 2387 case FORMAT_YV12_JFIF: 2388 return true; 2389 case FORMAT_V8U8: 2390 case FORMAT_X8L8V8U8: 2391 case FORMAT_V16U16: 2392 if(component < 2) 2393 { 2394 return false; 2395 } 2396 else 2397 { 2398 return true; 2399 } 2400 case FORMAT_A16W16V16U16: 2401 if(component < 3) 2402 { 2403 return false; 2404 } 2405 else 2406 { 2407 return true; 2408 } 2409 case FORMAT_Q8W8V8U8: 2410 case FORMAT_Q16W16V16U16: 2411 return false; 2412 case FORMAT_R32F: 2413 if(component < 1) 2414 { 2415 return false; 2416 } 2417 else 2418 { 2419 return true; 2420 } 2421 case FORMAT_G32R32F: 2422 if(component < 2) 2423 { 2424 return false; 2425 } 2426 else 2427 { 2428 return true; 2429 } 2430 case FORMAT_A32B32G32R32F: 2431 return false; 2432 default: 2433 ASSERT(false); 2434 } 2435 2436 return false; 2437 } 2438 2439 bool Surface::isSRGBreadable(Format format) 2440 { 2441 // Keep in sync with Capabilities::isSRGBreadable 2442 switch(format) 2443 { 2444 case FORMAT_L8: 2445 case FORMAT_A8L8: 2446 case FORMAT_R8G8B8: 2447 case FORMAT_A8R8G8B8: 2448 case FORMAT_X8R8G8B8: 2449 case FORMAT_A8B8G8R8: 2450 case FORMAT_X8B8G8R8: 2451 case FORMAT_R5G6B5: 2452 case FORMAT_X1R5G5B5: 2453 case FORMAT_A1R5G5B5: 2454 case FORMAT_A4R4G4B4: 2455 #if S3TC_SUPPORT 2456 case FORMAT_DXT1: 2457 case FORMAT_DXT3: 2458 case FORMAT_DXT5: 2459 #endif 2460 case FORMAT_ATI1: 2461 case FORMAT_ATI2: 2462 return true; 2463 default: 2464 return false; 2465 } 2466 2467 return false; 2468 } 2469 2470 bool Surface::isSRGBwritable(Format format) 2471 { 2472 // Keep in sync with Capabilities::isSRGBwritable 2473 switch(format) 2474 { 2475 case FORMAT_NULL: 2476 case FORMAT_A8R8G8B8: 2477 case FORMAT_X8R8G8B8: 2478 case FORMAT_A8B8G8R8: 2479 case FORMAT_X8B8G8R8: 2480 case FORMAT_R5G6B5: 2481 return true; 2482 default: 2483 return false; 2484 } 2485 } 2486 2487 bool Surface::isCompressed(Format format) 2488 { 2489 switch(format) 2490 { 2491 #if S3TC_SUPPORT 2492 case FORMAT_DXT1: 2493 case FORMAT_DXT3: 2494 case FORMAT_DXT5: 2495 #endif 2496 case FORMAT_ATI1: 2497 case FORMAT_ATI2: 2498 case FORMAT_ETC1: 2499 case FORMAT_R11_EAC: 2500 case FORMAT_SIGNED_R11_EAC: 2501 case FORMAT_RG11_EAC: 2502 case FORMAT_SIGNED_RG11_EAC: 2503 case FORMAT_RGB8_ETC2: 2504 case FORMAT_SRGB8_ETC2: 2505 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2506 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 2507 case FORMAT_RGBA8_ETC2_EAC: 2508 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 2509 case FORMAT_RGBA_ASTC_4x4_KHR: 2510 case FORMAT_RGBA_ASTC_5x4_KHR: 2511 case FORMAT_RGBA_ASTC_5x5_KHR: 2512 case FORMAT_RGBA_ASTC_6x5_KHR: 2513 case FORMAT_RGBA_ASTC_6x6_KHR: 2514 case FORMAT_RGBA_ASTC_8x5_KHR: 2515 case FORMAT_RGBA_ASTC_8x6_KHR: 2516 case FORMAT_RGBA_ASTC_8x8_KHR: 2517 case FORMAT_RGBA_ASTC_10x5_KHR: 2518 case FORMAT_RGBA_ASTC_10x6_KHR: 2519 case FORMAT_RGBA_ASTC_10x8_KHR: 2520 case FORMAT_RGBA_ASTC_10x10_KHR: 2521 case FORMAT_RGBA_ASTC_12x10_KHR: 2522 case FORMAT_RGBA_ASTC_12x12_KHR: 2523 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 2524 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 2525 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 2526 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 2527 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 2528 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 2529 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 2530 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 2531 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 2532 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 2533 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 2534 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 2535 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 2536 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 2537 return true; 2538 default: 2539 return false; 2540 } 2541 } 2542 2543 bool Surface::isNonNormalizedInteger(Format format) 2544 { 2545 switch(format) 2546 { 2547 case FORMAT_A8B8G8R8I: 2548 case FORMAT_X8B8G8R8I: 2549 case FORMAT_G8R8I: 2550 case FORMAT_R8I: 2551 case FORMAT_A8B8G8R8UI: 2552 case FORMAT_X8B8G8R8UI: 2553 case FORMAT_G8R8UI: 2554 case FORMAT_R8UI: 2555 case FORMAT_A16B16G16R16I: 2556 case FORMAT_X16B16G16R16I: 2557 case FORMAT_G16R16I: 2558 case FORMAT_R16I: 2559 case FORMAT_A16B16G16R16UI: 2560 case FORMAT_X16B16G16R16UI: 2561 case FORMAT_G16R16UI: 2562 case FORMAT_R16UI: 2563 case FORMAT_A32B32G32R32I: 2564 case FORMAT_X32B32G32R32I: 2565 case FORMAT_G32R32I: 2566 case FORMAT_R32I: 2567 case FORMAT_A32B32G32R32UI: 2568 case FORMAT_X32B32G32R32UI: 2569 case FORMAT_G32R32UI: 2570 case FORMAT_R32UI: 2571 return true; 2572 default: 2573 return false; 2574 } 2575 } 2576 2577 int Surface::componentCount(Format format) 2578 { 2579 switch(format) 2580 { 2581 case FORMAT_R5G6B5: return 3; 2582 case FORMAT_X8R8G8B8: return 3; 2583 case FORMAT_X8B8G8R8: return 3; 2584 case FORMAT_A8R8G8B8: return 4; 2585 case FORMAT_A8B8G8R8: return 4; 2586 case FORMAT_G8R8: return 2; 2587 case FORMAT_G16R16: return 2; 2588 case FORMAT_A16B16G16R16: return 4; 2589 case FORMAT_V8U8: return 2; 2590 case FORMAT_Q8W8V8U8: return 4; 2591 case FORMAT_X8L8V8U8: return 3; 2592 case FORMAT_V16U16: return 2; 2593 case FORMAT_A16W16V16U16: return 4; 2594 case FORMAT_Q16W16V16U16: return 4; 2595 case FORMAT_R32F: return 1; 2596 case FORMAT_G32R32F: return 2; 2597 case FORMAT_A32B32G32R32F: return 4; 2598 case FORMAT_D32F_LOCKABLE: return 1; 2599 case FORMAT_D32FS8_TEXTURE: return 1; 2600 case FORMAT_D32FS8_SHADOW: return 1; 2601 case FORMAT_A8: return 1; 2602 case FORMAT_R8: return 1; 2603 case FORMAT_L8: return 1; 2604 case FORMAT_L16: return 1; 2605 case FORMAT_A8L8: return 2; 2606 case FORMAT_YV12_BT601: return 3; 2607 case FORMAT_YV12_BT709: return 3; 2608 case FORMAT_YV12_JFIF: return 3; 2609 default: 2610 ASSERT(false); 2611 } 2612 2613 return 1; 2614 } 2615 2616 void *Surface::allocateBuffer(int width, int height, int depth, Format format) 2617 { 2618 // Render targets require 2x2 quads 2619 int width2 = (width + 1) & ~1; 2620 int height2 = (height + 1) & ~1; 2621 2622 // FIXME: Unpacking byte4 to short4 in the sampler currently involves reading 8 bytes, 2623 // so we have to allocate 4 extra bytes to avoid buffer overruns. 2624 return allocateZero(size(width2, height2, depth, format) + 4); 2625 } 2626 2627 void Surface::memfill4(void *buffer, int pattern, int bytes) 2628 { 2629 while((size_t)buffer & 0x1 && bytes >= 1) 2630 { 2631 *(char*)buffer = (char)pattern; 2632 (char*&)buffer += 1; 2633 bytes -= 1; 2634 } 2635 2636 while((size_t)buffer & 0x3 && bytes >= 2) 2637 { 2638 *(short*)buffer = (short)pattern; 2639 (short*&)buffer += 1; 2640 bytes -= 2; 2641 } 2642 2643 if(CPUID::supportsSSE()) 2644 { 2645 while((size_t)buffer & 0xF && bytes >= 4) 2646 { 2647 *(int*)buffer = pattern; 2648 (int*&)buffer += 1; 2649 bytes -= 4; 2650 } 2651 2652 __m128 quad = _mm_set_ps1((float&)pattern); 2653 2654 float *pointer = (float*)buffer; 2655 int qxwords = bytes / 64; 2656 bytes -= qxwords * 64; 2657 2658 while(qxwords--) 2659 { 2660 _mm_stream_ps(pointer + 0, quad); 2661 _mm_stream_ps(pointer + 4, quad); 2662 _mm_stream_ps(pointer + 8, quad); 2663 _mm_stream_ps(pointer + 12, quad); 2664 2665 pointer += 16; 2666 } 2667 2668 buffer = pointer; 2669 } 2670 2671 while(bytes >= 4) 2672 { 2673 *(int*)buffer = (int)pattern; 2674 (int*&)buffer += 1; 2675 bytes -= 4; 2676 } 2677 2678 while(bytes >= 2) 2679 { 2680 *(short*)buffer = (short)pattern; 2681 (short*&)buffer += 1; 2682 bytes -= 2; 2683 } 2684 2685 while(bytes >= 1) 2686 { 2687 *(char*)buffer = (char)pattern; 2688 (char*&)buffer += 1; 2689 bytes -= 1; 2690 } 2691 } 2692 2693 void Surface::clearColorBuffer(float red, float green, float blue, float alpha, unsigned int rgbaMask, int x0, int y0, int width, int height) 2694 { 2695 // FIXME: Also clear buffers in other formats? 2696 2697 // Not overlapping 2698 if(x0 > internal.width) return; 2699 if(y0 > internal.height) return; 2700 if(x0 + width < 0) return; 2701 if(y0 + height < 0) return; 2702 2703 // Clip against dimensions 2704 if(x0 < 0) {width += x0; x0 = 0;} 2705 if(x0 + width > internal.width) width = internal.width - x0; 2706 if(y0 < 0) {height += y0; y0 = 0;} 2707 if(y0 + height > internal.height) height = internal.height - y0; 2708 2709 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height; 2710 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY; 2711 2712 int x1 = x0 + width; 2713 int y1 = y0 + height; 2714 2715 // if(lockable || !quadLayoutEnabled) 2716 { 2717 unsigned char *buffer = (unsigned char*)lockInternal(x0, y0, 0, lock, PUBLIC); 2718 2719 for(int z = 0; z < internal.depth; z++) 2720 { 2721 unsigned char *target = buffer; 2722 2723 for(int y = y0; y < y1; y++) 2724 { 2725 switch(internal.format) 2726 { 2727 case FORMAT_NULL: 2728 break; 2729 case FORMAT_X8R8G8B8: 2730 case FORMAT_A8R8G8B8: 2731 // case FORMAT_X8G8R8B8Q: // FIXME 2732 // case FORMAT_A8G8R8B8Q: // FIXME 2733 { 2734 unsigned char r8 = iround(red * 0xFF); 2735 unsigned char g8 = iround(green * 0xFF); 2736 unsigned char b8 = iround(blue * 0xFF); 2737 unsigned char a8 = iround(alpha * 0xFF); 2738 unsigned char a8r8g8b8[4] = {b8, g8, r8, a8}; 2739 unsigned int colorARGB = (unsigned int&)a8r8g8b8; 2740 2741 if(rgbaMask == 0xF || (internal.format == FORMAT_X8R8G8B8 && rgbaMask == 0x7)) 2742 { 2743 memfill4(target, colorARGB, 4 * (x1 - x0)); 2744 } 2745 else 2746 { 2747 unsigned int bgraMask = (rgbaMask & 0x1 ? 0x00FF0000 : 0) | (rgbaMask & 0x2 ? 0x0000FF00 : 0) | (rgbaMask & 0x4 ? 0x000000FF : 0) | (rgbaMask & 0x8 ? 0xFF000000 : 0); 2748 unsigned int invMask = ~bgraMask; 2749 unsigned int maskedColor = colorARGB & bgraMask; 2750 unsigned int *target32 = (unsigned int*)target; 2751 2752 for(int x = 0; x < width; x++) 2753 { 2754 target32[x] = maskedColor | (target32[x] & invMask); 2755 } 2756 } 2757 } 2758 break; 2759 case FORMAT_X8B8G8R8: 2760 case FORMAT_A8B8G8R8: 2761 { 2762 unsigned char r8 = iround(red * 0xFF); 2763 unsigned char g8 = iround(green * 0xFF); 2764 unsigned char b8 = iround(blue * 0xFF); 2765 unsigned char a8 = iround(alpha * 0xFF); 2766 unsigned char a8b8g8r8[4] = {r8, g8, b8, a8}; 2767 unsigned int colorABGR = (unsigned int&)a8b8g8r8; 2768 2769 if(rgbaMask == 0xF || (internal.format == FORMAT_X8B8G8R8 && rgbaMask == 0x7)) 2770 { 2771 memfill4(target, colorABGR, 4 * (x1 - x0)); 2772 } 2773 else 2774 { 2775 unsigned int rgbaMask32 = (rgbaMask & 0x1 ? 0x000000FF : 0) | (rgbaMask & 0x2 ? 0x0000FF00 : 0) | (rgbaMask & 0x4 ? 0x00FF0000 : 0) | (rgbaMask & 0x8 ? 0xFF000000 : 0); 2776 unsigned int invMask = ~rgbaMask32; 2777 unsigned int maskedColor = colorABGR & rgbaMask32; 2778 unsigned int *target32 = (unsigned int*)target; 2779 2780 for(int x = 0; x < width; x++) 2781 { 2782 target32[x] = maskedColor | (target32[x] & invMask); 2783 } 2784 } 2785 } 2786 break; 2787 case FORMAT_G8R8: 2788 { 2789 unsigned char r8 = iround(red * 0xFF); 2790 unsigned char g8 = iround(green * 0xFF); 2791 unsigned char g8r8[4] = {r8, g8, r8, g8}; 2792 2793 if((rgbaMask & 0x3) == 0x3) 2794 { 2795 memfill4(target, (int&)g8r8, 2 * (x1 - x0)); 2796 } 2797 else 2798 { 2799 unsigned short rgMask = (rgbaMask & 0x1 ? 0x000000FF : 0) | (rgbaMask & 0x2 ? 0x0000FF00 : 0); 2800 unsigned short invMask = ~rgMask; 2801 unsigned short maskedColor = (unsigned short&)g8r8 & rgMask; 2802 unsigned short *target16 = (unsigned short*)target; 2803 2804 for(int x = 0; x < width; x++) 2805 { 2806 target16[x] = maskedColor | (target16[x] & invMask); 2807 } 2808 } 2809 } 2810 break; 2811 case FORMAT_G16R16: 2812 { 2813 unsigned char r16 = iround(red * 0xFFFF); 2814 unsigned char g16 = iround(green * 0xFFFF); 2815 unsigned short g16r16[2] = {r16, g16}; 2816 2817 if((rgbaMask & 0x3) == 0x3) 2818 { 2819 memfill4(target, (int&)g16r16, 4 * (x1 - x0)); 2820 } 2821 else 2822 { 2823 unsigned int rgMask = (rgbaMask & 0x1 ? 0x0000FFFF : 0) | (rgbaMask & 0x2 ? 0xFFFF0000 : 0); 2824 unsigned int invMask = ~rgMask; 2825 unsigned int maskedColor = (unsigned int&)g16r16 & rgMask; 2826 unsigned int *target32 = (unsigned int*)target; 2827 2828 for(int x = 0; x < width; x++) 2829 { 2830 target32[x] = maskedColor | (target32[x] & invMask); 2831 } 2832 } 2833 } 2834 break; 2835 case FORMAT_A16B16G16R16: 2836 { 2837 unsigned char r16 = iround(red * 0xFFFF); 2838 unsigned char g16 = iround(green * 0xFFFF); 2839 unsigned char b16 = iround(blue * 0xFFFF); 2840 unsigned char a16 = iround(alpha * 0xFFFF); 2841 2842 if(rgbaMask == 0xF) 2843 { 2844 for(int x = 0; x < width; x++) 2845 { 2846 ((unsigned short*)target)[4 * x + 0] = r16; 2847 ((unsigned short*)target)[4 * x + 1] = g16; 2848 ((unsigned short*)target)[4 * x + 2] = b16; 2849 ((unsigned short*)target)[4 * x + 3] = a16; 2850 } 2851 } 2852 else 2853 { 2854 if(rgbaMask & 0x1) for(int x = 0; x < width; x++) ((unsigned short*)target)[4 * x + 0] = r16; 2855 if(rgbaMask & 0x2) for(int x = 0; x < width; x++) ((unsigned short*)target)[4 * x + 1] = g16; 2856 if(rgbaMask & 0x4) for(int x = 0; x < width; x++) ((unsigned short*)target)[4 * x + 2] = b16; 2857 if(rgbaMask & 0x8) for(int x = 0; x < width; x++) ((unsigned short*)target)[4 * x + 3] = a16; 2858 } 2859 } 2860 break; 2861 case FORMAT_R32F: 2862 if(rgbaMask & 0x1) 2863 { 2864 for(int x = 0; x < width; x++) 2865 { 2866 ((float*)target)[x] = red; 2867 } 2868 } 2869 break; 2870 case FORMAT_G32R32F: 2871 if((rgbaMask & 0x3) == 0x3) 2872 { 2873 for(int x = 0; x < width; x++) 2874 { 2875 ((float*)target)[2 * x + 0] = red; 2876 ((float*)target)[2 * x + 1] = green; 2877 } 2878 } 2879 else 2880 { 2881 if(rgbaMask & 0x1) for(int x = 0; x < width; x++) ((float*)target)[2 * x + 0] = red; 2882 if(rgbaMask & 0x2) for(int x = 0; x < width; x++) ((float*)target)[2 * x + 1] = green; 2883 } 2884 break; 2885 case FORMAT_A32B32G32R32F: 2886 if(rgbaMask == 0xF) 2887 { 2888 for(int x = 0; x < width; x++) 2889 { 2890 ((float*)target)[4 * x + 0] = red; 2891 ((float*)target)[4 * x + 1] = green; 2892 ((float*)target)[4 * x + 2] = blue; 2893 ((float*)target)[4 * x + 3] = alpha; 2894 } 2895 } 2896 else 2897 { 2898 if(rgbaMask & 0x1) for(int x = 0; x < width; x++) ((float*)target)[4 * x + 0] = red; 2899 if(rgbaMask & 0x2) for(int x = 0; x < width; x++) ((float*)target)[4 * x + 1] = green; 2900 if(rgbaMask & 0x4) for(int x = 0; x < width; x++) ((float*)target)[4 * x + 2] = blue; 2901 if(rgbaMask & 0x8) for(int x = 0; x < width; x++) ((float*)target)[4 * x + 3] = alpha; 2902 } 2903 break; 2904 case FORMAT_R5G6B5: 2905 { 2906 unsigned int r5 = iround(red * 0x1F); 2907 unsigned int g6 = iround(green * 0x3F); 2908 unsigned int b5 = iround(blue * 0x1F); 2909 unsigned int r5g6b5 = (r5 << 11) | (g6 << 5) | b5; 2910 2911 if((rgbaMask & 0x7) == 0x7) 2912 { 2913 unsigned int r5g6b5r5g6b5 = r5g6b5 | (r5g6b5 << 16); 2914 memfill4(target, r5g6b5r5g6b5, 2 * (x1 - x0)); 2915 } 2916 else 2917 { 2918 unsigned short rgbMask = (rgbaMask & 0x1 ? 0xF800 : 0) | (rgbaMask & 0x2 ? 0x07E0 : 0) | (rgbaMask & 0x3 ? 0x001F : 0); 2919 unsigned short invMask = ~rgbMask; 2920 unsigned short maskedColor = r5g6b5 & rgbMask; 2921 unsigned short *target16 = (unsigned short*)target; 2922 2923 for(int x = 0; x < width; x++) 2924 { 2925 target16[x] = maskedColor | (target16[x] & invMask); 2926 } 2927 } 2928 } 2929 break; 2930 default: 2931 ASSERT(false); 2932 } 2933 2934 target += internal.pitchB; 2935 } 2936 2937 buffer += internal.sliceB; 2938 } 2939 2940 unlockInternal(); 2941 } 2942 /* else 2943 { 2944 int width2 = (internal.width + 1) & ~1; 2945 2946 // unsigned char *target = (unsigned char*&)buffer; 2947 // 2948 // for(int y = y0; y < y1; y++) 2949 // { 2950 // for(int x = x0; x < x1; x++) 2951 // { 2952 // target[width2 * 4 * (y & ~1) + 2 * (y & 1) + 8 * (x & ~1) + (x & 1) + 0] = (color & 0x000000FF) >> 0; 2953 // target[width2 * 4 * (y & ~1) + 2 * (y & 1) + 8 * (x & ~1) + (x & 1) + 4] = (color & 0x00FF0000) >> 16; 2954 // target[width2 * 4 * (y & ~1) + 2 * (y & 1) + 8 * (x & ~1) + (x & 1) + 8] = (color & 0x0000FF00) >> 8; 2955 // target[width2 * 4 * (y & ~1) + 2 * (y & 1) + 8 * (x & ~1) + (x & 1) + 12] = (color & 0xFF000000) >> 24; 2956 // } 2957 // } 2958 2959 unsigned char colorQ[16]; 2960 2961 colorQ[0] = (color & 0x000000FF) >> 0; 2962 colorQ[1] = (color & 0x000000FF) >> 0; 2963 colorQ[2] = (color & 0x000000FF) >> 0; 2964 colorQ[3] = (color & 0x000000FF) >> 0; 2965 colorQ[4] = (color & 0x00FF0000) >> 16; 2966 colorQ[5] = (color & 0x00FF0000) >> 16; 2967 colorQ[6] = (color & 0x00FF0000) >> 16; 2968 colorQ[7] = (color & 0x00FF0000) >> 16; 2969 colorQ[8] = (color & 0x0000FF00) >> 8; 2970 colorQ[9] = (color & 0x0000FF00) >> 8; 2971 colorQ[10] = (color & 0x0000FF00) >> 8; 2972 colorQ[11] = (color & 0x0000FF00) >> 8; 2973 colorQ[12] = (color & 0xFF000000) >> 24; 2974 colorQ[13] = (color & 0xFF000000) >> 24; 2975 colorQ[14] = (color & 0xFF000000) >> 24; 2976 colorQ[15] = (color & 0xFF000000) >> 24; 2977 2978 for(int y = y0; y < y1; y++) 2979 { 2980 unsigned char *target = (unsigned char*)lockInternal(0, 0, 0, lock) + width2 * 4 * (y & ~1) + 2 * (y & 1); // FIXME: Unlock 2981 2982 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once 2983 { 2984 if((x0 & 1) != 0) 2985 { 2986 target[8 * (x0 & ~1) + 1 + 0] = (color & 0x000000FF) >> 0; 2987 target[8 * (x0 & ~1) + 1 + 4] = (color & 0x00FF0000) >> 16; 2988 target[8 * (x0 & ~1) + 1 + 8] = (color & 0x0000FF00) >> 8; 2989 target[8 * (x0 & ~1) + 1 + 12] = (color & 0xFF000000) >> 24; 2990 2991 target[8 * (x0 & ~1) + 3 + 0] = (color & 0x000000FF) >> 0; 2992 target[8 * (x0 & ~1) + 3 + 4] = (color & 0x00FF0000) >> 16; 2993 target[8 * (x0 & ~1) + 3 + 8] = (color & 0x0000FF00) >> 8; 2994 target[8 * (x0 & ~1) + 3 + 12] = (color & 0xFF000000) >> 24; 2995 } 2996 2997 __asm 2998 { 2999 movq mm0, colorQ+0 3000 movq mm1, colorQ+8 3001 3002 mov eax, x0 3003 add eax, 1 3004 and eax, 0xFFFFFFFE 3005 cmp eax, x1 3006 jge qEnd 3007 3008 mov edi, target 3009 3010 qLoop: 3011 movntq [edi+8*eax+0], mm0 3012 movntq [edi+8*eax+8], mm1 3013 3014 add eax, 2 3015 cmp eax, x1 3016 jl qLoop 3017 qEnd: 3018 emms 3019 } 3020 3021 if((x1 & 1) != 0) 3022 { 3023 target[8 * (x1 & ~1) + 0 + 0] = (color & 0x000000FF) >> 0; 3024 target[8 * (x1 & ~1) + 0 + 4] = (color & 0x00FF0000) >> 16; 3025 target[8 * (x1 & ~1) + 0 + 8] = (color & 0x0000FF00) >> 8; 3026 target[8 * (x1 & ~1) + 0 + 12] = (color & 0xFF000000) >> 24; 3027 3028 target[8 * (x1 & ~1) + 2 + 0] = (color & 0x000000FF) >> 0; 3029 target[8 * (x1 & ~1) + 2 + 4] = (color & 0x00FF0000) >> 16; 3030 target[8 * (x1 & ~1) + 2 + 8] = (color & 0x0000FF00) >> 8; 3031 target[8 * (x1 & ~1) + 2 + 12] = (color & 0xFF000000) >> 24; 3032 } 3033 3034 y++; 3035 } 3036 else 3037 { 3038 for(int x = x0; x < x1; x++) 3039 { 3040 target[8 * (x & ~1) + (x & 1) + 0] = (color & 0x000000FF) >> 0; 3041 target[8 * (x & ~1) + (x & 1) + 4] = (color & 0x00FF0000) >> 16; 3042 target[8 * (x & ~1) + (x & 1) + 8] = (color & 0x0000FF00) >> 8; 3043 target[8 * (x & ~1) + (x & 1) + 12] = (color & 0xFF000000) >> 24; 3044 } 3045 } 3046 } 3047 }*/ 3048 } 3049 3050 void Surface::clearDepthBuffer(float depth, int x0, int y0, int width, int height) 3051 { 3052 // Not overlapping 3053 if(x0 > internal.width) return; 3054 if(y0 > internal.height) return; 3055 if(x0 + width < 0) return; 3056 if(y0 + height < 0) return; 3057 3058 // Clip against dimensions 3059 if(x0 < 0) {width += x0; x0 = 0;} 3060 if(x0 + width > internal.width) width = internal.width - x0; 3061 if(y0 < 0) {height += y0; y0 = 0;} 3062 if(y0 + height > internal.height) height = internal.height - y0; 3063 3064 const bool entire = x0 == 0 && y0 == 0 && width == internal.width && height == internal.height; 3065 const Lock lock = entire ? LOCK_DISCARD : LOCK_WRITEONLY; 3066 3067 int width2 = (internal.width + 1) & ~1; 3068 3069 int x1 = x0 + width; 3070 int y1 = y0 + height; 3071 3072 if(internal.format == FORMAT_D32F_LOCKABLE || 3073 internal.format == FORMAT_D32FS8_TEXTURE || 3074 internal.format == FORMAT_D32FS8_SHADOW) 3075 { 3076 float *target = (float*)lockInternal(0, 0, 0, lock, PUBLIC) + x0 + width2 * y0; 3077 3078 for(int z = 0; z < internal.depth; z++) 3079 { 3080 for(int y = y0; y < y1; y++) 3081 { 3082 memfill4(target, (int&)depth, 4 * width); 3083 target += width2; 3084 } 3085 } 3086 3087 unlockInternal(); 3088 } 3089 else // Quad layout 3090 { 3091 if(complementaryDepthBuffer) 3092 { 3093 depth = 1 - depth; 3094 } 3095 3096 float *buffer = (float*)lockInternal(0, 0, 0, lock, PUBLIC); 3097 3098 for(int z = 0; z < internal.depth; z++) 3099 { 3100 for(int y = y0; y < y1; y++) 3101 { 3102 float *target = buffer + (y & ~1) * width2 + (y & 1) * 2; 3103 3104 if((y & 1) == 0 && y + 1 < y1) // Fill quad line at once 3105 { 3106 if((x0 & 1) != 0) 3107 { 3108 target[(x0 & ~1) * 2 + 1] = depth; 3109 target[(x0 & ~1) * 2 + 3] = depth; 3110 } 3111 3112 // for(int x2 = ((x0 + 1) & ~1) * 2; x2 < x1 * 2; x2 += 4) 3113 // { 3114 // target[x2 + 0] = depth; 3115 // target[x2 + 1] = depth; 3116 // target[x2 + 2] = depth; 3117 // target[x2 + 3] = depth; 3118 // } 3119 3120 // __asm 3121 // { 3122 // movss xmm0, depth 3123 // shufps xmm0, xmm0, 0x00 3124 // 3125 // mov eax, x0 3126 // add eax, 1 3127 // and eax, 0xFFFFFFFE 3128 // cmp eax, x1 3129 // jge qEnd 3130 // 3131 // mov edi, target 3132 // 3133 // qLoop: 3134 // movntps [edi+8*eax], xmm0 3135 // 3136 // add eax, 2 3137 // cmp eax, x1 3138 // jl qLoop 3139 // qEnd: 3140 // } 3141 3142 memfill4(&target[((x0 + 1) & ~1) * 2], (int&)depth, 8 * ((x1 & ~1) - ((x0 + 1) & ~1))); 3143 3144 if((x1 & 1) != 0) 3145 { 3146 target[(x1 & ~1) * 2 + 0] = depth; 3147 target[(x1 & ~1) * 2 + 2] = depth; 3148 } 3149 3150 y++; 3151 } 3152 else 3153 { 3154 for(int x = x0; x < x1; x++) 3155 { 3156 target[(x & ~1) * 2 + (x & 1)] = depth; 3157 } 3158 } 3159 } 3160 3161 buffer += internal.sliceP; 3162 } 3163 3164 unlockInternal(); 3165 } 3166 } 3167 3168 void Surface::clearStencilBuffer(unsigned char s, unsigned char mask, int x0, int y0, int width, int height) 3169 { 3170 // Not overlapping 3171 if(x0 > internal.width) return; 3172 if(y0 > internal.height) return; 3173 if(x0 + width < 0) return; 3174 if(y0 + height < 0) return; 3175 3176 // Clip against dimensions 3177 if(x0 < 0) {width += x0; x0 = 0;} 3178 if(x0 + width > internal.width) width = internal.width - x0; 3179 if(y0 < 0) {height += y0; y0 = 0;} 3180 if(y0 + height > internal.height) height = internal.height - y0; 3181 3182 int width2 = (internal.width + 1) & ~1; 3183 3184 int x1 = x0 + width; 3185 int y1 = y0 + height; 3186 3187 unsigned char maskedS = s & mask; 3188 unsigned char invMask = ~mask; 3189 unsigned int fill = maskedS; 3190 fill = fill | (fill << 8) | (fill << 16) + (fill << 24); 3191 3192 if(false) 3193 { 3194 char *target = (char*)lockStencil(0, PUBLIC) + x0 + width2 * y0; 3195 3196 for(int z = 0; z < stencil.depth; z++) 3197 { 3198 for(int y = y0; y < y0 + height; y++) 3199 { 3200 if(mask == 0xFF) 3201 { 3202 memfill4(target, fill, width); 3203 } 3204 else 3205 { 3206 for(int x = 0; x < width; x++) 3207 { 3208 target[x] = maskedS | (target[x] & invMask); 3209 } 3210 } 3211 3212 target += width2; 3213 } 3214 } 3215 3216 unlockStencil(); 3217 } 3218 else // Quad layout 3219 { 3220 char *buffer = (char*)lockStencil(0, PUBLIC); 3221 3222 if(mask == 0xFF) 3223 { 3224 for(int z = 0; z < stencil.depth; z++) 3225 { 3226 for(int y = y0; y < y1; y++) 3227 { 3228 char *target = buffer + (y & ~1) * width2 + (y & 1) * 2; 3229 3230 if((y & 1) == 0 && y + 1 < y1 && mask == 0xFF) // Fill quad line at once 3231 { 3232 if((x0 & 1) != 0) 3233 { 3234 target[(x0 & ~1) * 2 + 1] = fill; 3235 target[(x0 & ~1) * 2 + 3] = fill; 3236 } 3237 3238 memfill4(&target[((x0 + 1) & ~1) * 2], fill, ((x1 + 1) & ~1) * 2 - ((x0 + 1) & ~1) * 2); 3239 3240 if((x1 & 1) != 0) 3241 { 3242 target[(x1 & ~1) * 2 + 0] = fill; 3243 target[(x1 & ~1) * 2 + 2] = fill; 3244 } 3245 3246 y++; 3247 } 3248 else 3249 { 3250 for(int x = x0; x < x1; x++) 3251 { 3252 target[(x & ~1) * 2 + (x & 1)] = maskedS | (target[x] & invMask); 3253 } 3254 } 3255 } 3256 3257 buffer += stencil.sliceP; 3258 } 3259 } 3260 3261 unlockStencil(); 3262 } 3263 } 3264 3265 void Surface::fill(const Color<float> &color, int x0, int y0, int width, int height) 3266 { 3267 unsigned char *row; 3268 Buffer *buffer; 3269 3270 if(internal.dirty) 3271 { 3272 row = (unsigned char*)lockInternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC); 3273 buffer = &internal; 3274 } 3275 else 3276 { 3277 row = (unsigned char*)lockExternal(x0, y0, 0, LOCK_WRITEONLY, PUBLIC); 3278 buffer = &external; 3279 } 3280 3281 if(buffer->bytes <= 4) 3282 { 3283 int c; 3284 buffer->write(&c, color); 3285 3286 if(buffer->bytes <= 1) c = (c << 8) | c; 3287 if(buffer->bytes <= 2) c = (c << 16) | c; 3288 3289 for(int y = 0; y < height; y++) 3290 { 3291 memfill4(row, c, width * buffer->bytes); 3292 3293 row += buffer->pitchB; 3294 } 3295 } 3296 else // Generic 3297 { 3298 for(int y = 0; y < height; y++) 3299 { 3300 unsigned char *element = row; 3301 3302 for(int x = 0; x < width; x++) 3303 { 3304 buffer->write(element, color); 3305 3306 element += buffer->bytes; 3307 } 3308 3309 row += buffer->pitchB; 3310 } 3311 } 3312 3313 if(buffer == &internal) 3314 { 3315 unlockInternal(); 3316 } 3317 else 3318 { 3319 unlockExternal(); 3320 } 3321 } 3322 3323 Color<float> Surface::readExternal(int x, int y, int z) const 3324 { 3325 ASSERT(external.lock != LOCK_UNLOCKED); 3326 3327 return external.read(x, y, z); 3328 } 3329 3330 Color<float> Surface::readExternal(int x, int y) const 3331 { 3332 ASSERT(external.lock != LOCK_UNLOCKED); 3333 3334 return external.read(x, y); 3335 } 3336 3337 Color<float> Surface::sampleExternal(float x, float y, float z) const 3338 { 3339 ASSERT(external.lock != LOCK_UNLOCKED); 3340 3341 return external.sample(x, y, z); 3342 } 3343 3344 Color<float> Surface::sampleExternal(float x, float y) const 3345 { 3346 ASSERT(external.lock != LOCK_UNLOCKED); 3347 3348 return external.sample(x, y); 3349 } 3350 3351 void Surface::writeExternal(int x, int y, int z, const Color<float> &color) 3352 { 3353 ASSERT(external.lock != LOCK_UNLOCKED); 3354 3355 external.write(x, y, z, color); 3356 } 3357 3358 void Surface::writeExternal(int x, int y, const Color<float> &color) 3359 { 3360 ASSERT(external.lock != LOCK_UNLOCKED); 3361 3362 external.write(x, y, color); 3363 } 3364 3365 void Surface::copyInternal(const Surface* source, int x, int y, float srcX, float srcY, bool filter) 3366 { 3367 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED); 3368 3369 sw::Color<float> color; 3370 3371 if(!filter) 3372 { 3373 color = source->internal.read((int)srcX, (int)srcY); 3374 } 3375 else // Bilinear filtering 3376 { 3377 color = source->internal.sample(srcX, srcY); 3378 } 3379 3380 internal.write(x, y, color); 3381 } 3382 3383 void Surface::copyInternal(const Surface* source, int x, int y, int z, float srcX, float srcY, float srcZ, bool filter) 3384 { 3385 ASSERT(internal.lock != LOCK_UNLOCKED && source && source->internal.lock != LOCK_UNLOCKED); 3386 3387 sw::Color<float> color; 3388 3389 if(!filter) 3390 { 3391 color = source->internal.read((int)srcX, (int)srcY, int(srcZ)); 3392 } 3393 else // Bilinear filtering 3394 { 3395 color = source->internal.sample(srcX, srcY, srcZ); 3396 } 3397 3398 internal.write(x, y, z, color); 3399 } 3400 3401 bool Surface::hasStencil() const 3402 { 3403 return isStencil(external.format); 3404 } 3405 3406 bool Surface::hasDepth() const 3407 { 3408 return isDepth(external.format); 3409 } 3410 3411 bool Surface::hasPalette() const 3412 { 3413 return isPalette(external.format); 3414 } 3415 3416 bool Surface::isRenderTarget() const 3417 { 3418 return renderTarget; 3419 } 3420 3421 bool Surface::hasDirtyMipmaps() const 3422 { 3423 return dirtyMipmaps; 3424 } 3425 3426 void Surface::cleanMipmaps() 3427 { 3428 dirtyMipmaps = false; 3429 } 3430 3431 Resource *Surface::getResource() 3432 { 3433 return resource; 3434 } 3435 3436 bool Surface::identicalFormats() const 3437 { 3438 return external.format == internal.format && 3439 external.width == internal.width && 3440 external.height == internal.height && 3441 external.depth == internal.depth && 3442 external.pitchB == internal.pitchB && 3443 external.sliceB == internal.sliceB; 3444 } 3445 3446 Format Surface::selectInternalFormat(Format format) const 3447 { 3448 switch(format) 3449 { 3450 case FORMAT_NULL: 3451 return FORMAT_NULL; 3452 case FORMAT_P8: 3453 case FORMAT_A8P8: 3454 case FORMAT_A4R4G4B4: 3455 case FORMAT_A1R5G5B5: 3456 case FORMAT_A8R3G3B2: 3457 return FORMAT_A8R8G8B8; 3458 case FORMAT_A8: 3459 return FORMAT_A8; 3460 case FORMAT_R8: 3461 return FORMAT_R8; 3462 case FORMAT_A2R10G10B10: 3463 case FORMAT_A2B10G10R10: 3464 case FORMAT_A16B16G16R16: 3465 return FORMAT_A16B16G16R16; 3466 case FORMAT_G8R8: 3467 return FORMAT_G8R8; 3468 case FORMAT_G16R16: 3469 return FORMAT_G16R16; 3470 case FORMAT_A8R8G8B8: 3471 if(lockable || !quadLayoutEnabled) 3472 { 3473 return FORMAT_A8R8G8B8; 3474 } 3475 else 3476 { 3477 return FORMAT_A8G8R8B8Q; 3478 } 3479 case FORMAT_R5G5B5A1: 3480 case FORMAT_R4G4B4A4: 3481 case FORMAT_A8B8G8R8: 3482 return FORMAT_A8B8G8R8; 3483 case FORMAT_R5G6B5: 3484 return FORMAT_R5G6B5; 3485 case FORMAT_R3G3B2: 3486 case FORMAT_R8G8B8: 3487 case FORMAT_X4R4G4B4: 3488 case FORMAT_X1R5G5B5: 3489 case FORMAT_X8R8G8B8: 3490 if(lockable || !quadLayoutEnabled) 3491 { 3492 return FORMAT_X8R8G8B8; 3493 } 3494 else 3495 { 3496 return FORMAT_X8G8R8B8Q; 3497 } 3498 case FORMAT_B8G8R8: 3499 case FORMAT_X8B8G8R8: 3500 return FORMAT_X8B8G8R8; 3501 // Compressed formats 3502 #if S3TC_SUPPORT 3503 case FORMAT_DXT1: 3504 case FORMAT_DXT3: 3505 case FORMAT_DXT5: 3506 #endif 3507 case FORMAT_RGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3508 case FORMAT_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2: 3509 case FORMAT_RGBA8_ETC2_EAC: 3510 case FORMAT_SRGB8_ALPHA8_ETC2_EAC: 3511 case FORMAT_SRGB8_ALPHA8_ASTC_4x4_KHR: 3512 case FORMAT_SRGB8_ALPHA8_ASTC_5x4_KHR: 3513 case FORMAT_SRGB8_ALPHA8_ASTC_5x5_KHR: 3514 case FORMAT_SRGB8_ALPHA8_ASTC_6x5_KHR: 3515 case FORMAT_SRGB8_ALPHA8_ASTC_6x6_KHR: 3516 case FORMAT_SRGB8_ALPHA8_ASTC_8x5_KHR: 3517 case FORMAT_SRGB8_ALPHA8_ASTC_8x6_KHR: 3518 case FORMAT_SRGB8_ALPHA8_ASTC_8x8_KHR: 3519 case FORMAT_SRGB8_ALPHA8_ASTC_10x5_KHR: 3520 case FORMAT_SRGB8_ALPHA8_ASTC_10x6_KHR: 3521 case FORMAT_SRGB8_ALPHA8_ASTC_10x8_KHR: 3522 case FORMAT_SRGB8_ALPHA8_ASTC_10x10_KHR: 3523 case FORMAT_SRGB8_ALPHA8_ASTC_12x10_KHR: 3524 case FORMAT_SRGB8_ALPHA8_ASTC_12x12_KHR: 3525 return FORMAT_A8R8G8B8; 3526 case FORMAT_RGBA_ASTC_4x4_KHR: 3527 case FORMAT_RGBA_ASTC_5x4_KHR: 3528 case FORMAT_RGBA_ASTC_5x5_KHR: 3529 case FORMAT_RGBA_ASTC_6x5_KHR: 3530 case FORMAT_RGBA_ASTC_6x6_KHR: 3531 case FORMAT_RGBA_ASTC_8x5_KHR: 3532 case FORMAT_RGBA_ASTC_8x6_KHR: 3533 case FORMAT_RGBA_ASTC_8x8_KHR: 3534 case FORMAT_RGBA_ASTC_10x5_KHR: 3535 case FORMAT_RGBA_ASTC_10x6_KHR: 3536 case FORMAT_RGBA_ASTC_10x8_KHR: 3537 case FORMAT_RGBA_ASTC_10x10_KHR: 3538 case FORMAT_RGBA_ASTC_12x10_KHR: 3539 case FORMAT_RGBA_ASTC_12x12_KHR: 3540 // ASTC supports HDR, so a floating point format is required to represent it properly 3541 return FORMAT_A32B32G32R32F; // FIXME: 16FP is probably sufficient, but it's currently unsupported 3542 case FORMAT_ATI1: 3543 case FORMAT_R11_EAC: 3544 return FORMAT_R8; 3545 case FORMAT_SIGNED_R11_EAC: 3546 return FORMAT_R32F; // FIXME: Signed 8bit format would be sufficient 3547 case FORMAT_ATI2: 3548 case FORMAT_RG11_EAC: 3549 return FORMAT_G8R8; 3550 case FORMAT_SIGNED_RG11_EAC: 3551 return FORMAT_G32R32F; // FIXME: Signed 8bit format would be sufficient 3552 case FORMAT_ETC1: 3553 case FORMAT_RGB8_ETC2: 3554 case FORMAT_SRGB8_ETC2: 3555 return FORMAT_X8R8G8B8; 3556 // Bumpmap formats 3557 case FORMAT_V8U8: return FORMAT_V8U8; 3558 case FORMAT_L6V5U5: return FORMAT_X8L8V8U8; 3559 case FORMAT_Q8W8V8U8: return FORMAT_Q8W8V8U8; 3560 case FORMAT_X8L8V8U8: return FORMAT_X8L8V8U8; 3561 case FORMAT_V16U16: return FORMAT_V16U16; 3562 case FORMAT_A2W10V10U10: return FORMAT_A16W16V16U16; 3563 case FORMAT_Q16W16V16U16: return FORMAT_Q16W16V16U16; 3564 // Floating-point formats 3565 case FORMAT_A16F: return FORMAT_A32B32G32R32F; 3566 case FORMAT_R16F: return FORMAT_R32F; 3567 case FORMAT_G16R16F: return FORMAT_G32R32F; 3568 case FORMAT_B16G16R16F: return FORMAT_A32B32G32R32F; 3569 case FORMAT_A16B16G16R16F: return FORMAT_A32B32G32R32F; 3570 case FORMAT_A32F: return FORMAT_A32B32G32R32F; 3571 case FORMAT_R32F: return FORMAT_R32F; 3572 case FORMAT_G32R32F: return FORMAT_G32R32F; 3573 case FORMAT_B32G32R32F: return FORMAT_A32B32G32R32F; 3574 case FORMAT_A32B32G32R32F: return FORMAT_A32B32G32R32F; 3575 // Luminance formats 3576 case FORMAT_L8: return FORMAT_L8; 3577 case FORMAT_A4L4: return FORMAT_A8L8; 3578 case FORMAT_L16: return FORMAT_L16; 3579 case FORMAT_A8L8: return FORMAT_A8L8; 3580 case FORMAT_L16F: return FORMAT_A32B32G32R32F; 3581 case FORMAT_A16L16F: return FORMAT_A32B32G32R32F; 3582 case FORMAT_L32F: return FORMAT_A32B32G32R32F; 3583 case FORMAT_A32L32F: return FORMAT_A32B32G32R32F; 3584 // Depth/stencil formats 3585 case FORMAT_D16: 3586 case FORMAT_D32: 3587 case FORMAT_D24X8: 3588 case FORMAT_D24S8: 3589 case FORMAT_D24FS8: 3590 if(hasParent) // Texture 3591 { 3592 return FORMAT_D32FS8_SHADOW; 3593 } 3594 else if(complementaryDepthBuffer) 3595 { 3596 return FORMAT_D32F_COMPLEMENTARY; 3597 } 3598 else 3599 { 3600 return FORMAT_D32F; 3601 } 3602 case FORMAT_D32F_LOCKABLE: return FORMAT_D32F_LOCKABLE; 3603 case FORMAT_D32FS8_TEXTURE: return FORMAT_D32FS8_TEXTURE; 3604 case FORMAT_INTZ: return FORMAT_D32FS8_TEXTURE; 3605 case FORMAT_DF24S8: return FORMAT_D32FS8_SHADOW; 3606 case FORMAT_DF16S8: return FORMAT_D32FS8_SHADOW; 3607 case FORMAT_YV12_BT601: return FORMAT_YV12_BT601; 3608 case FORMAT_YV12_BT709: return FORMAT_YV12_BT709; 3609 case FORMAT_YV12_JFIF: return FORMAT_YV12_JFIF; 3610 default: 3611 ASSERT(false); 3612 } 3613 3614 return FORMAT_NULL; 3615 } 3616 3617 void Surface::setTexturePalette(unsigned int *palette) 3618 { 3619 Surface::palette = palette; 3620 Surface::paletteID++; 3621 } 3622 3623 void Surface::resolve() 3624 { 3625 if(internal.depth <= 1 || !internal.dirty || !renderTarget || internal.format == FORMAT_NULL) 3626 { 3627 return; 3628 } 3629 3630 void *source = internal.lockRect(0, 0, 0, LOCK_READWRITE); 3631 3632 int quality = internal.depth; 3633 int width = internal.width; 3634 int height = internal.height; 3635 int pitch = internal.pitchB; 3636 int slice = internal.sliceB; 3637 3638 unsigned char *source0 = (unsigned char*)source; 3639 unsigned char *source1 = source0 + slice; 3640 unsigned char *source2 = source1 + slice; 3641 unsigned char *source3 = source2 + slice; 3642 unsigned char *source4 = source3 + slice; 3643 unsigned char *source5 = source4 + slice; 3644 unsigned char *source6 = source5 + slice; 3645 unsigned char *source7 = source6 + slice; 3646 unsigned char *source8 = source7 + slice; 3647 unsigned char *source9 = source8 + slice; 3648 unsigned char *sourceA = source9 + slice; 3649 unsigned char *sourceB = sourceA + slice; 3650 unsigned char *sourceC = sourceB + slice; 3651 unsigned char *sourceD = sourceC + slice; 3652 unsigned char *sourceE = sourceD + slice; 3653 unsigned char *sourceF = sourceE + slice; 3654 3655 if(internal.format == FORMAT_X8R8G8B8 || internal.format == FORMAT_A8R8G8B8 || internal.format == FORMAT_X8B8G8R8 || internal.format == FORMAT_A8B8G8R8) 3656 { 3657 if(CPUID::supportsSSE2() && (width % 4) == 0) 3658 { 3659 if(internal.depth == 2) 3660 { 3661 for(int y = 0; y < height; y++) 3662 { 3663 for(int x = 0; x < width; x += 4) 3664 { 3665 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3666 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3667 3668 c0 = _mm_avg_epu8(c0, c1); 3669 3670 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3671 } 3672 3673 source0 += pitch; 3674 source1 += pitch; 3675 } 3676 } 3677 else if(internal.depth == 4) 3678 { 3679 for(int y = 0; y < height; y++) 3680 { 3681 for(int x = 0; x < width; x += 4) 3682 { 3683 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3684 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3685 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 3686 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 3687 3688 c0 = _mm_avg_epu8(c0, c1); 3689 c2 = _mm_avg_epu8(c2, c3); 3690 c0 = _mm_avg_epu8(c0, c2); 3691 3692 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3693 } 3694 3695 source0 += pitch; 3696 source1 += pitch; 3697 source2 += pitch; 3698 source3 += pitch; 3699 } 3700 } 3701 else if(internal.depth == 8) 3702 { 3703 for(int y = 0; y < height; y++) 3704 { 3705 for(int x = 0; x < width; x += 4) 3706 { 3707 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3708 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3709 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 3710 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 3711 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 3712 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 3713 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 3714 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 3715 3716 c0 = _mm_avg_epu8(c0, c1); 3717 c2 = _mm_avg_epu8(c2, c3); 3718 c4 = _mm_avg_epu8(c4, c5); 3719 c6 = _mm_avg_epu8(c6, c7); 3720 c0 = _mm_avg_epu8(c0, c2); 3721 c4 = _mm_avg_epu8(c4, c6); 3722 c0 = _mm_avg_epu8(c0, c4); 3723 3724 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3725 } 3726 3727 source0 += pitch; 3728 source1 += pitch; 3729 source2 += pitch; 3730 source3 += pitch; 3731 source4 += pitch; 3732 source5 += pitch; 3733 source6 += pitch; 3734 source7 += pitch; 3735 } 3736 } 3737 else if(internal.depth == 16) 3738 { 3739 for(int y = 0; y < height; y++) 3740 { 3741 for(int x = 0; x < width; x += 4) 3742 { 3743 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3744 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3745 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 3746 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 3747 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 3748 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 3749 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 3750 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 3751 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x)); 3752 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x)); 3753 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x)); 3754 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x)); 3755 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x)); 3756 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x)); 3757 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x)); 3758 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x)); 3759 3760 c0 = _mm_avg_epu8(c0, c1); 3761 c2 = _mm_avg_epu8(c2, c3); 3762 c4 = _mm_avg_epu8(c4, c5); 3763 c6 = _mm_avg_epu8(c6, c7); 3764 c8 = _mm_avg_epu8(c8, c9); 3765 cA = _mm_avg_epu8(cA, cB); 3766 cC = _mm_avg_epu8(cC, cD); 3767 cE = _mm_avg_epu8(cE, cF); 3768 c0 = _mm_avg_epu8(c0, c2); 3769 c4 = _mm_avg_epu8(c4, c6); 3770 c8 = _mm_avg_epu8(c8, cA); 3771 cC = _mm_avg_epu8(cC, cE); 3772 c0 = _mm_avg_epu8(c0, c4); 3773 c8 = _mm_avg_epu8(c8, cC); 3774 c0 = _mm_avg_epu8(c0, c8); 3775 3776 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3777 } 3778 3779 source0 += pitch; 3780 source1 += pitch; 3781 source2 += pitch; 3782 source3 += pitch; 3783 source4 += pitch; 3784 source5 += pitch; 3785 source6 += pitch; 3786 source7 += pitch; 3787 source8 += pitch; 3788 source9 += pitch; 3789 sourceA += pitch; 3790 sourceB += pitch; 3791 sourceC += pitch; 3792 sourceD += pitch; 3793 sourceE += pitch; 3794 sourceF += pitch; 3795 } 3796 } 3797 else ASSERT(false); 3798 } 3799 else 3800 { 3801 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7F7F7F7F) + (((x) ^ (y)) & 0x01010101)) 3802 3803 if(internal.depth == 2) 3804 { 3805 for(int y = 0; y < height; y++) 3806 { 3807 for(int x = 0; x < width; x++) 3808 { 3809 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 3810 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 3811 3812 c0 = AVERAGE(c0, c1); 3813 3814 *(unsigned int*)(source0 + 4 * x) = c0; 3815 } 3816 3817 source0 += pitch; 3818 source1 += pitch; 3819 } 3820 } 3821 else if(internal.depth == 4) 3822 { 3823 for(int y = 0; y < height; y++) 3824 { 3825 for(int x = 0; x < width; x++) 3826 { 3827 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 3828 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 3829 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 3830 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 3831 3832 c0 = AVERAGE(c0, c1); 3833 c2 = AVERAGE(c2, c3); 3834 c0 = AVERAGE(c0, c2); 3835 3836 *(unsigned int*)(source0 + 4 * x) = c0; 3837 } 3838 3839 source0 += pitch; 3840 source1 += pitch; 3841 source2 += pitch; 3842 source3 += pitch; 3843 } 3844 } 3845 else if(internal.depth == 8) 3846 { 3847 for(int y = 0; y < height; y++) 3848 { 3849 for(int x = 0; x < width; x++) 3850 { 3851 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 3852 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 3853 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 3854 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 3855 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 3856 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 3857 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 3858 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 3859 3860 c0 = AVERAGE(c0, c1); 3861 c2 = AVERAGE(c2, c3); 3862 c4 = AVERAGE(c4, c5); 3863 c6 = AVERAGE(c6, c7); 3864 c0 = AVERAGE(c0, c2); 3865 c4 = AVERAGE(c4, c6); 3866 c0 = AVERAGE(c0, c4); 3867 3868 *(unsigned int*)(source0 + 4 * x) = c0; 3869 } 3870 3871 source0 += pitch; 3872 source1 += pitch; 3873 source2 += pitch; 3874 source3 += pitch; 3875 source4 += pitch; 3876 source5 += pitch; 3877 source6 += pitch; 3878 source7 += pitch; 3879 } 3880 } 3881 else if(internal.depth == 16) 3882 { 3883 for(int y = 0; y < height; y++) 3884 { 3885 for(int x = 0; x < width; x++) 3886 { 3887 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 3888 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 3889 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 3890 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 3891 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 3892 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 3893 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 3894 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 3895 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 3896 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 3897 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 3898 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 3899 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 3900 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 3901 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 3902 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 3903 3904 c0 = AVERAGE(c0, c1); 3905 c2 = AVERAGE(c2, c3); 3906 c4 = AVERAGE(c4, c5); 3907 c6 = AVERAGE(c6, c7); 3908 c8 = AVERAGE(c8, c9); 3909 cA = AVERAGE(cA, cB); 3910 cC = AVERAGE(cC, cD); 3911 cE = AVERAGE(cE, cF); 3912 c0 = AVERAGE(c0, c2); 3913 c4 = AVERAGE(c4, c6); 3914 c8 = AVERAGE(c8, cA); 3915 cC = AVERAGE(cC, cE); 3916 c0 = AVERAGE(c0, c4); 3917 c8 = AVERAGE(c8, cC); 3918 c0 = AVERAGE(c0, c8); 3919 3920 *(unsigned int*)(source0 + 4 * x) = c0; 3921 } 3922 3923 source0 += pitch; 3924 source1 += pitch; 3925 source2 += pitch; 3926 source3 += pitch; 3927 source4 += pitch; 3928 source5 += pitch; 3929 source6 += pitch; 3930 source7 += pitch; 3931 source8 += pitch; 3932 source9 += pitch; 3933 sourceA += pitch; 3934 sourceB += pitch; 3935 sourceC += pitch; 3936 sourceD += pitch; 3937 sourceE += pitch; 3938 sourceF += pitch; 3939 } 3940 } 3941 else ASSERT(false); 3942 3943 #undef AVERAGE 3944 } 3945 } 3946 else if(internal.format == FORMAT_G16R16) 3947 { 3948 if(CPUID::supportsSSE2() && (width % 4) == 0) 3949 { 3950 if(internal.depth == 2) 3951 { 3952 for(int y = 0; y < height; y++) 3953 { 3954 for(int x = 0; x < width; x += 4) 3955 { 3956 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3957 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3958 3959 c0 = _mm_avg_epu16(c0, c1); 3960 3961 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3962 } 3963 3964 source0 += pitch; 3965 source1 += pitch; 3966 } 3967 } 3968 else if(internal.depth == 4) 3969 { 3970 for(int y = 0; y < height; y++) 3971 { 3972 for(int x = 0; x < width; x += 4) 3973 { 3974 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3975 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 3976 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 3977 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 3978 3979 c0 = _mm_avg_epu16(c0, c1); 3980 c2 = _mm_avg_epu16(c2, c3); 3981 c0 = _mm_avg_epu16(c0, c2); 3982 3983 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 3984 } 3985 3986 source0 += pitch; 3987 source1 += pitch; 3988 source2 += pitch; 3989 source3 += pitch; 3990 } 3991 } 3992 else if(internal.depth == 8) 3993 { 3994 for(int y = 0; y < height; y++) 3995 { 3996 for(int x = 0; x < width; x += 4) 3997 { 3998 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 3999 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4000 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4001 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4002 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4003 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4004 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4005 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4006 4007 c0 = _mm_avg_epu16(c0, c1); 4008 c2 = _mm_avg_epu16(c2, c3); 4009 c4 = _mm_avg_epu16(c4, c5); 4010 c6 = _mm_avg_epu16(c6, c7); 4011 c0 = _mm_avg_epu16(c0, c2); 4012 c4 = _mm_avg_epu16(c4, c6); 4013 c0 = _mm_avg_epu16(c0, c4); 4014 4015 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4016 } 4017 4018 source0 += pitch; 4019 source1 += pitch; 4020 source2 += pitch; 4021 source3 += pitch; 4022 source4 += pitch; 4023 source5 += pitch; 4024 source6 += pitch; 4025 source7 += pitch; 4026 } 4027 } 4028 else if(internal.depth == 16) 4029 { 4030 for(int y = 0; y < height; y++) 4031 { 4032 for(int x = 0; x < width; x += 4) 4033 { 4034 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 4 * x)); 4035 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 4 * x)); 4036 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 4 * x)); 4037 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 4 * x)); 4038 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 4 * x)); 4039 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 4 * x)); 4040 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 4 * x)); 4041 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 4 * x)); 4042 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 4 * x)); 4043 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 4 * x)); 4044 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 4 * x)); 4045 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 4 * x)); 4046 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 4 * x)); 4047 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 4 * x)); 4048 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 4 * x)); 4049 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 4 * x)); 4050 4051 c0 = _mm_avg_epu16(c0, c1); 4052 c2 = _mm_avg_epu16(c2, c3); 4053 c4 = _mm_avg_epu16(c4, c5); 4054 c6 = _mm_avg_epu16(c6, c7); 4055 c8 = _mm_avg_epu16(c8, c9); 4056 cA = _mm_avg_epu16(cA, cB); 4057 cC = _mm_avg_epu16(cC, cD); 4058 cE = _mm_avg_epu16(cE, cF); 4059 c0 = _mm_avg_epu16(c0, c2); 4060 c4 = _mm_avg_epu16(c4, c6); 4061 c8 = _mm_avg_epu16(c8, cA); 4062 cC = _mm_avg_epu16(cC, cE); 4063 c0 = _mm_avg_epu16(c0, c4); 4064 c8 = _mm_avg_epu16(c8, cC); 4065 c0 = _mm_avg_epu16(c0, c8); 4066 4067 _mm_store_si128((__m128i*)(source0 + 4 * x), c0); 4068 } 4069 4070 source0 += pitch; 4071 source1 += pitch; 4072 source2 += pitch; 4073 source3 += pitch; 4074 source4 += pitch; 4075 source5 += pitch; 4076 source6 += pitch; 4077 source7 += pitch; 4078 source8 += pitch; 4079 source9 += pitch; 4080 sourceA += pitch; 4081 sourceB += pitch; 4082 sourceC += pitch; 4083 sourceD += pitch; 4084 sourceE += pitch; 4085 sourceF += pitch; 4086 } 4087 } 4088 else ASSERT(false); 4089 } 4090 else 4091 { 4092 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001)) 4093 4094 if(internal.depth == 2) 4095 { 4096 for(int y = 0; y < height; y++) 4097 { 4098 for(int x = 0; x < width; x++) 4099 { 4100 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4101 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4102 4103 c0 = AVERAGE(c0, c1); 4104 4105 *(unsigned int*)(source0 + 4 * x) = c0; 4106 } 4107 4108 source0 += pitch; 4109 source1 += pitch; 4110 } 4111 } 4112 else if(internal.depth == 4) 4113 { 4114 for(int y = 0; y < height; y++) 4115 { 4116 for(int x = 0; x < width; x++) 4117 { 4118 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4119 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4120 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4121 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4122 4123 c0 = AVERAGE(c0, c1); 4124 c2 = AVERAGE(c2, c3); 4125 c0 = AVERAGE(c0, c2); 4126 4127 *(unsigned int*)(source0 + 4 * x) = c0; 4128 } 4129 4130 source0 += pitch; 4131 source1 += pitch; 4132 source2 += pitch; 4133 source3 += pitch; 4134 } 4135 } 4136 else if(internal.depth == 8) 4137 { 4138 for(int y = 0; y < height; y++) 4139 { 4140 for(int x = 0; x < width; x++) 4141 { 4142 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4143 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4144 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4145 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4146 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4147 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4148 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4149 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4150 4151 c0 = AVERAGE(c0, c1); 4152 c2 = AVERAGE(c2, c3); 4153 c4 = AVERAGE(c4, c5); 4154 c6 = AVERAGE(c6, c7); 4155 c0 = AVERAGE(c0, c2); 4156 c4 = AVERAGE(c4, c6); 4157 c0 = AVERAGE(c0, c4); 4158 4159 *(unsigned int*)(source0 + 4 * x) = c0; 4160 } 4161 4162 source0 += pitch; 4163 source1 += pitch; 4164 source2 += pitch; 4165 source3 += pitch; 4166 source4 += pitch; 4167 source5 += pitch; 4168 source6 += pitch; 4169 source7 += pitch; 4170 } 4171 } 4172 else if(internal.depth == 16) 4173 { 4174 for(int y = 0; y < height; y++) 4175 { 4176 for(int x = 0; x < width; x++) 4177 { 4178 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4179 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4180 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4181 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4182 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4183 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4184 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4185 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4186 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 4187 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 4188 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 4189 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 4190 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 4191 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 4192 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 4193 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 4194 4195 c0 = AVERAGE(c0, c1); 4196 c2 = AVERAGE(c2, c3); 4197 c4 = AVERAGE(c4, c5); 4198 c6 = AVERAGE(c6, c7); 4199 c8 = AVERAGE(c8, c9); 4200 cA = AVERAGE(cA, cB); 4201 cC = AVERAGE(cC, cD); 4202 cE = AVERAGE(cE, cF); 4203 c0 = AVERAGE(c0, c2); 4204 c4 = AVERAGE(c4, c6); 4205 c8 = AVERAGE(c8, cA); 4206 cC = AVERAGE(cC, cE); 4207 c0 = AVERAGE(c0, c4); 4208 c8 = AVERAGE(c8, cC); 4209 c0 = AVERAGE(c0, c8); 4210 4211 *(unsigned int*)(source0 + 4 * x) = c0; 4212 } 4213 4214 source0 += pitch; 4215 source1 += pitch; 4216 source2 += pitch; 4217 source3 += pitch; 4218 source4 += pitch; 4219 source5 += pitch; 4220 source6 += pitch; 4221 source7 += pitch; 4222 source8 += pitch; 4223 source9 += pitch; 4224 sourceA += pitch; 4225 sourceB += pitch; 4226 sourceC += pitch; 4227 sourceD += pitch; 4228 sourceE += pitch; 4229 sourceF += pitch; 4230 } 4231 } 4232 else ASSERT(false); 4233 4234 #undef AVERAGE 4235 } 4236 } 4237 else if(internal.format == FORMAT_A16B16G16R16) 4238 { 4239 if(CPUID::supportsSSE2() && (width % 2) == 0) 4240 { 4241 if(internal.depth == 2) 4242 { 4243 for(int y = 0; y < height; y++) 4244 { 4245 for(int x = 0; x < width; x += 2) 4246 { 4247 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4248 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4249 4250 c0 = _mm_avg_epu16(c0, c1); 4251 4252 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4253 } 4254 4255 source0 += pitch; 4256 source1 += pitch; 4257 } 4258 } 4259 else if(internal.depth == 4) 4260 { 4261 for(int y = 0; y < height; y++) 4262 { 4263 for(int x = 0; x < width; x += 2) 4264 { 4265 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4266 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4267 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4268 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4269 4270 c0 = _mm_avg_epu16(c0, c1); 4271 c2 = _mm_avg_epu16(c2, c3); 4272 c0 = _mm_avg_epu16(c0, c2); 4273 4274 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4275 } 4276 4277 source0 += pitch; 4278 source1 += pitch; 4279 source2 += pitch; 4280 source3 += pitch; 4281 } 4282 } 4283 else if(internal.depth == 8) 4284 { 4285 for(int y = 0; y < height; y++) 4286 { 4287 for(int x = 0; x < width; x += 2) 4288 { 4289 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4290 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4291 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4292 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4293 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x)); 4294 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x)); 4295 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x)); 4296 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x)); 4297 4298 c0 = _mm_avg_epu16(c0, c1); 4299 c2 = _mm_avg_epu16(c2, c3); 4300 c4 = _mm_avg_epu16(c4, c5); 4301 c6 = _mm_avg_epu16(c6, c7); 4302 c0 = _mm_avg_epu16(c0, c2); 4303 c4 = _mm_avg_epu16(c4, c6); 4304 c0 = _mm_avg_epu16(c0, c4); 4305 4306 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4307 } 4308 4309 source0 += pitch; 4310 source1 += pitch; 4311 source2 += pitch; 4312 source3 += pitch; 4313 source4 += pitch; 4314 source5 += pitch; 4315 source6 += pitch; 4316 source7 += pitch; 4317 } 4318 } 4319 else if(internal.depth == 16) 4320 { 4321 for(int y = 0; y < height; y++) 4322 { 4323 for(int x = 0; x < width; x += 2) 4324 { 4325 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 8 * x)); 4326 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 8 * x)); 4327 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 8 * x)); 4328 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 8 * x)); 4329 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 8 * x)); 4330 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 8 * x)); 4331 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 8 * x)); 4332 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 8 * x)); 4333 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 8 * x)); 4334 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 8 * x)); 4335 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 8 * x)); 4336 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 8 * x)); 4337 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 8 * x)); 4338 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 8 * x)); 4339 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 8 * x)); 4340 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 8 * x)); 4341 4342 c0 = _mm_avg_epu16(c0, c1); 4343 c2 = _mm_avg_epu16(c2, c3); 4344 c4 = _mm_avg_epu16(c4, c5); 4345 c6 = _mm_avg_epu16(c6, c7); 4346 c8 = _mm_avg_epu16(c8, c9); 4347 cA = _mm_avg_epu16(cA, cB); 4348 cC = _mm_avg_epu16(cC, cD); 4349 cE = _mm_avg_epu16(cE, cF); 4350 c0 = _mm_avg_epu16(c0, c2); 4351 c4 = _mm_avg_epu16(c4, c6); 4352 c8 = _mm_avg_epu16(c8, cA); 4353 cC = _mm_avg_epu16(cC, cE); 4354 c0 = _mm_avg_epu16(c0, c4); 4355 c8 = _mm_avg_epu16(c8, cC); 4356 c0 = _mm_avg_epu16(c0, c8); 4357 4358 _mm_store_si128((__m128i*)(source0 + 8 * x), c0); 4359 } 4360 4361 source0 += pitch; 4362 source1 += pitch; 4363 source2 += pitch; 4364 source3 += pitch; 4365 source4 += pitch; 4366 source5 += pitch; 4367 source6 += pitch; 4368 source7 += pitch; 4369 source8 += pitch; 4370 source9 += pitch; 4371 sourceA += pitch; 4372 sourceB += pitch; 4373 sourceC += pitch; 4374 sourceD += pitch; 4375 sourceE += pitch; 4376 sourceF += pitch; 4377 } 4378 } 4379 else ASSERT(false); 4380 } 4381 else 4382 { 4383 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7FFF7FFF) + (((x) ^ (y)) & 0x00010001)) 4384 4385 if(internal.depth == 2) 4386 { 4387 for(int y = 0; y < height; y++) 4388 { 4389 for(int x = 0; x < 2 * width; x++) 4390 { 4391 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4392 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4393 4394 c0 = AVERAGE(c0, c1); 4395 4396 *(unsigned int*)(source0 + 4 * x) = c0; 4397 } 4398 4399 source0 += pitch; 4400 source1 += pitch; 4401 } 4402 } 4403 else if(internal.depth == 4) 4404 { 4405 for(int y = 0; y < height; y++) 4406 { 4407 for(int x = 0; x < 2 * width; x++) 4408 { 4409 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4410 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4411 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4412 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4413 4414 c0 = AVERAGE(c0, c1); 4415 c2 = AVERAGE(c2, c3); 4416 c0 = AVERAGE(c0, c2); 4417 4418 *(unsigned int*)(source0 + 4 * x) = c0; 4419 } 4420 4421 source0 += pitch; 4422 source1 += pitch; 4423 source2 += pitch; 4424 source3 += pitch; 4425 } 4426 } 4427 else if(internal.depth == 8) 4428 { 4429 for(int y = 0; y < height; y++) 4430 { 4431 for(int x = 0; x < 2 * width; x++) 4432 { 4433 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4434 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4435 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4436 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4437 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4438 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4439 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4440 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4441 4442 c0 = AVERAGE(c0, c1); 4443 c2 = AVERAGE(c2, c3); 4444 c4 = AVERAGE(c4, c5); 4445 c6 = AVERAGE(c6, c7); 4446 c0 = AVERAGE(c0, c2); 4447 c4 = AVERAGE(c4, c6); 4448 c0 = AVERAGE(c0, c4); 4449 4450 *(unsigned int*)(source0 + 4 * x) = c0; 4451 } 4452 4453 source0 += pitch; 4454 source1 += pitch; 4455 source2 += pitch; 4456 source3 += pitch; 4457 source4 += pitch; 4458 source5 += pitch; 4459 source6 += pitch; 4460 source7 += pitch; 4461 } 4462 } 4463 else if(internal.depth == 16) 4464 { 4465 for(int y = 0; y < height; y++) 4466 { 4467 for(int x = 0; x < 2 * width; x++) 4468 { 4469 unsigned int c0 = *(unsigned int*)(source0 + 4 * x); 4470 unsigned int c1 = *(unsigned int*)(source1 + 4 * x); 4471 unsigned int c2 = *(unsigned int*)(source2 + 4 * x); 4472 unsigned int c3 = *(unsigned int*)(source3 + 4 * x); 4473 unsigned int c4 = *(unsigned int*)(source4 + 4 * x); 4474 unsigned int c5 = *(unsigned int*)(source5 + 4 * x); 4475 unsigned int c6 = *(unsigned int*)(source6 + 4 * x); 4476 unsigned int c7 = *(unsigned int*)(source7 + 4 * x); 4477 unsigned int c8 = *(unsigned int*)(source8 + 4 * x); 4478 unsigned int c9 = *(unsigned int*)(source9 + 4 * x); 4479 unsigned int cA = *(unsigned int*)(sourceA + 4 * x); 4480 unsigned int cB = *(unsigned int*)(sourceB + 4 * x); 4481 unsigned int cC = *(unsigned int*)(sourceC + 4 * x); 4482 unsigned int cD = *(unsigned int*)(sourceD + 4 * x); 4483 unsigned int cE = *(unsigned int*)(sourceE + 4 * x); 4484 unsigned int cF = *(unsigned int*)(sourceF + 4 * x); 4485 4486 c0 = AVERAGE(c0, c1); 4487 c2 = AVERAGE(c2, c3); 4488 c4 = AVERAGE(c4, c5); 4489 c6 = AVERAGE(c6, c7); 4490 c8 = AVERAGE(c8, c9); 4491 cA = AVERAGE(cA, cB); 4492 cC = AVERAGE(cC, cD); 4493 cE = AVERAGE(cE, cF); 4494 c0 = AVERAGE(c0, c2); 4495 c4 = AVERAGE(c4, c6); 4496 c8 = AVERAGE(c8, cA); 4497 cC = AVERAGE(cC, cE); 4498 c0 = AVERAGE(c0, c4); 4499 c8 = AVERAGE(c8, cC); 4500 c0 = AVERAGE(c0, c8); 4501 4502 *(unsigned int*)(source0 + 4 * x) = c0; 4503 } 4504 4505 source0 += pitch; 4506 source1 += pitch; 4507 source2 += pitch; 4508 source3 += pitch; 4509 source4 += pitch; 4510 source5 += pitch; 4511 source6 += pitch; 4512 source7 += pitch; 4513 source8 += pitch; 4514 source9 += pitch; 4515 sourceA += pitch; 4516 sourceB += pitch; 4517 sourceC += pitch; 4518 sourceD += pitch; 4519 sourceE += pitch; 4520 sourceF += pitch; 4521 } 4522 } 4523 else ASSERT(false); 4524 4525 #undef AVERAGE 4526 } 4527 } 4528 else if(internal.format == FORMAT_R32F) 4529 { 4530 if(CPUID::supportsSSE() && (width % 4) == 0) 4531 { 4532 if(internal.depth == 2) 4533 { 4534 for(int y = 0; y < height; y++) 4535 { 4536 for(int x = 0; x < width; x += 4) 4537 { 4538 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4539 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4540 4541 c0 = _mm_add_ps(c0, c1); 4542 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 4543 4544 _mm_store_ps((float*)(source0 + 4 * x), c0); 4545 } 4546 4547 source0 += pitch; 4548 source1 += pitch; 4549 } 4550 } 4551 else if(internal.depth == 4) 4552 { 4553 for(int y = 0; y < height; y++) 4554 { 4555 for(int x = 0; x < width; x += 4) 4556 { 4557 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4558 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4559 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4560 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4561 4562 c0 = _mm_add_ps(c0, c1); 4563 c2 = _mm_add_ps(c2, c3); 4564 c0 = _mm_add_ps(c0, c2); 4565 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 4566 4567 _mm_store_ps((float*)(source0 + 4 * x), c0); 4568 } 4569 4570 source0 += pitch; 4571 source1 += pitch; 4572 source2 += pitch; 4573 source3 += pitch; 4574 } 4575 } 4576 else if(internal.depth == 8) 4577 { 4578 for(int y = 0; y < height; y++) 4579 { 4580 for(int x = 0; x < width; x += 4) 4581 { 4582 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4583 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4584 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4585 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4586 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x)); 4587 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x)); 4588 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x)); 4589 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x)); 4590 4591 c0 = _mm_add_ps(c0, c1); 4592 c2 = _mm_add_ps(c2, c3); 4593 c4 = _mm_add_ps(c4, c5); 4594 c6 = _mm_add_ps(c6, c7); 4595 c0 = _mm_add_ps(c0, c2); 4596 c4 = _mm_add_ps(c4, c6); 4597 c0 = _mm_add_ps(c0, c4); 4598 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 4599 4600 _mm_store_ps((float*)(source0 + 4 * x), c0); 4601 } 4602 4603 source0 += pitch; 4604 source1 += pitch; 4605 source2 += pitch; 4606 source3 += pitch; 4607 source4 += pitch; 4608 source5 += pitch; 4609 source6 += pitch; 4610 source7 += pitch; 4611 } 4612 } 4613 else if(internal.depth == 16) 4614 { 4615 for(int y = 0; y < height; y++) 4616 { 4617 for(int x = 0; x < width; x += 4) 4618 { 4619 __m128 c0 = _mm_load_ps((float*)(source0 + 4 * x)); 4620 __m128 c1 = _mm_load_ps((float*)(source1 + 4 * x)); 4621 __m128 c2 = _mm_load_ps((float*)(source2 + 4 * x)); 4622 __m128 c3 = _mm_load_ps((float*)(source3 + 4 * x)); 4623 __m128 c4 = _mm_load_ps((float*)(source4 + 4 * x)); 4624 __m128 c5 = _mm_load_ps((float*)(source5 + 4 * x)); 4625 __m128 c6 = _mm_load_ps((float*)(source6 + 4 * x)); 4626 __m128 c7 = _mm_load_ps((float*)(source7 + 4 * x)); 4627 __m128 c8 = _mm_load_ps((float*)(source8 + 4 * x)); 4628 __m128 c9 = _mm_load_ps((float*)(source9 + 4 * x)); 4629 __m128 cA = _mm_load_ps((float*)(sourceA + 4 * x)); 4630 __m128 cB = _mm_load_ps((float*)(sourceB + 4 * x)); 4631 __m128 cC = _mm_load_ps((float*)(sourceC + 4 * x)); 4632 __m128 cD = _mm_load_ps((float*)(sourceD + 4 * x)); 4633 __m128 cE = _mm_load_ps((float*)(sourceE + 4 * x)); 4634 __m128 cF = _mm_load_ps((float*)(sourceF + 4 * x)); 4635 4636 c0 = _mm_add_ps(c0, c1); 4637 c2 = _mm_add_ps(c2, c3); 4638 c4 = _mm_add_ps(c4, c5); 4639 c6 = _mm_add_ps(c6, c7); 4640 c8 = _mm_add_ps(c8, c9); 4641 cA = _mm_add_ps(cA, cB); 4642 cC = _mm_add_ps(cC, cD); 4643 cE = _mm_add_ps(cE, cF); 4644 c0 = _mm_add_ps(c0, c2); 4645 c4 = _mm_add_ps(c4, c6); 4646 c8 = _mm_add_ps(c8, cA); 4647 cC = _mm_add_ps(cC, cE); 4648 c0 = _mm_add_ps(c0, c4); 4649 c8 = _mm_add_ps(c8, cC); 4650 c0 = _mm_add_ps(c0, c8); 4651 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 4652 4653 _mm_store_ps((float*)(source0 + 4 * x), c0); 4654 } 4655 4656 source0 += pitch; 4657 source1 += pitch; 4658 source2 += pitch; 4659 source3 += pitch; 4660 source4 += pitch; 4661 source5 += pitch; 4662 source6 += pitch; 4663 source7 += pitch; 4664 source8 += pitch; 4665 source9 += pitch; 4666 sourceA += pitch; 4667 sourceB += pitch; 4668 sourceC += pitch; 4669 sourceD += pitch; 4670 sourceE += pitch; 4671 sourceF += pitch; 4672 } 4673 } 4674 else ASSERT(false); 4675 } 4676 else 4677 { 4678 if(internal.depth == 2) 4679 { 4680 for(int y = 0; y < height; y++) 4681 { 4682 for(int x = 0; x < width; x++) 4683 { 4684 float c0 = *(float*)(source0 + 4 * x); 4685 float c1 = *(float*)(source1 + 4 * x); 4686 4687 c0 = c0 + c1; 4688 c0 *= 1.0f / 2.0f; 4689 4690 *(float*)(source0 + 4 * x) = c0; 4691 } 4692 4693 source0 += pitch; 4694 source1 += pitch; 4695 } 4696 } 4697 else if(internal.depth == 4) 4698 { 4699 for(int y = 0; y < height; y++) 4700 { 4701 for(int x = 0; x < width; x++) 4702 { 4703 float c0 = *(float*)(source0 + 4 * x); 4704 float c1 = *(float*)(source1 + 4 * x); 4705 float c2 = *(float*)(source2 + 4 * x); 4706 float c3 = *(float*)(source3 + 4 * x); 4707 4708 c0 = c0 + c1; 4709 c2 = c2 + c3; 4710 c0 = c0 + c2; 4711 c0 *= 1.0f / 4.0f; 4712 4713 *(float*)(source0 + 4 * x) = c0; 4714 } 4715 4716 source0 += pitch; 4717 source1 += pitch; 4718 source2 += pitch; 4719 source3 += pitch; 4720 } 4721 } 4722 else if(internal.depth == 8) 4723 { 4724 for(int y = 0; y < height; y++) 4725 { 4726 for(int x = 0; x < width; x++) 4727 { 4728 float c0 = *(float*)(source0 + 4 * x); 4729 float c1 = *(float*)(source1 + 4 * x); 4730 float c2 = *(float*)(source2 + 4 * x); 4731 float c3 = *(float*)(source3 + 4 * x); 4732 float c4 = *(float*)(source4 + 4 * x); 4733 float c5 = *(float*)(source5 + 4 * x); 4734 float c6 = *(float*)(source6 + 4 * x); 4735 float c7 = *(float*)(source7 + 4 * x); 4736 4737 c0 = c0 + c1; 4738 c2 = c2 + c3; 4739 c4 = c4 + c5; 4740 c6 = c6 + c7; 4741 c0 = c0 + c2; 4742 c4 = c4 + c6; 4743 c0 = c0 + c4; 4744 c0 *= 1.0f / 8.0f; 4745 4746 *(float*)(source0 + 4 * x) = c0; 4747 } 4748 4749 source0 += pitch; 4750 source1 += pitch; 4751 source2 += pitch; 4752 source3 += pitch; 4753 source4 += pitch; 4754 source5 += pitch; 4755 source6 += pitch; 4756 source7 += pitch; 4757 } 4758 } 4759 else if(internal.depth == 16) 4760 { 4761 for(int y = 0; y < height; y++) 4762 { 4763 for(int x = 0; x < width; x++) 4764 { 4765 float c0 = *(float*)(source0 + 4 * x); 4766 float c1 = *(float*)(source1 + 4 * x); 4767 float c2 = *(float*)(source2 + 4 * x); 4768 float c3 = *(float*)(source3 + 4 * x); 4769 float c4 = *(float*)(source4 + 4 * x); 4770 float c5 = *(float*)(source5 + 4 * x); 4771 float c6 = *(float*)(source6 + 4 * x); 4772 float c7 = *(float*)(source7 + 4 * x); 4773 float c8 = *(float*)(source8 + 4 * x); 4774 float c9 = *(float*)(source9 + 4 * x); 4775 float cA = *(float*)(sourceA + 4 * x); 4776 float cB = *(float*)(sourceB + 4 * x); 4777 float cC = *(float*)(sourceC + 4 * x); 4778 float cD = *(float*)(sourceD + 4 * x); 4779 float cE = *(float*)(sourceE + 4 * x); 4780 float cF = *(float*)(sourceF + 4 * x); 4781 4782 c0 = c0 + c1; 4783 c2 = c2 + c3; 4784 c4 = c4 + c5; 4785 c6 = c6 + c7; 4786 c8 = c8 + c9; 4787 cA = cA + cB; 4788 cC = cC + cD; 4789 cE = cE + cF; 4790 c0 = c0 + c2; 4791 c4 = c4 + c6; 4792 c8 = c8 + cA; 4793 cC = cC + cE; 4794 c0 = c0 + c4; 4795 c8 = c8 + cC; 4796 c0 = c0 + c8; 4797 c0 *= 1.0f / 16.0f; 4798 4799 *(float*)(source0 + 4 * x) = c0; 4800 } 4801 4802 source0 += pitch; 4803 source1 += pitch; 4804 source2 += pitch; 4805 source3 += pitch; 4806 source4 += pitch; 4807 source5 += pitch; 4808 source6 += pitch; 4809 source7 += pitch; 4810 source8 += pitch; 4811 source9 += pitch; 4812 sourceA += pitch; 4813 sourceB += pitch; 4814 sourceC += pitch; 4815 sourceD += pitch; 4816 sourceE += pitch; 4817 sourceF += pitch; 4818 } 4819 } 4820 else ASSERT(false); 4821 } 4822 } 4823 else if(internal.format == FORMAT_G32R32F) 4824 { 4825 if(CPUID::supportsSSE() && (width % 2) == 0) 4826 { 4827 if(internal.depth == 2) 4828 { 4829 for(int y = 0; y < height; y++) 4830 { 4831 for(int x = 0; x < width; x += 2) 4832 { 4833 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 4834 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 4835 4836 c0 = _mm_add_ps(c0, c1); 4837 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 4838 4839 _mm_store_ps((float*)(source0 + 8 * x), c0); 4840 } 4841 4842 source0 += pitch; 4843 source1 += pitch; 4844 } 4845 } 4846 else if(internal.depth == 4) 4847 { 4848 for(int y = 0; y < height; y++) 4849 { 4850 for(int x = 0; x < width; x += 2) 4851 { 4852 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 4853 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 4854 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 4855 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 4856 4857 c0 = _mm_add_ps(c0, c1); 4858 c2 = _mm_add_ps(c2, c3); 4859 c0 = _mm_add_ps(c0, c2); 4860 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 4861 4862 _mm_store_ps((float*)(source0 + 8 * x), c0); 4863 } 4864 4865 source0 += pitch; 4866 source1 += pitch; 4867 source2 += pitch; 4868 source3 += pitch; 4869 } 4870 } 4871 else if(internal.depth == 8) 4872 { 4873 for(int y = 0; y < height; y++) 4874 { 4875 for(int x = 0; x < width; x += 2) 4876 { 4877 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 4878 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 4879 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 4880 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 4881 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x)); 4882 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x)); 4883 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x)); 4884 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x)); 4885 4886 c0 = _mm_add_ps(c0, c1); 4887 c2 = _mm_add_ps(c2, c3); 4888 c4 = _mm_add_ps(c4, c5); 4889 c6 = _mm_add_ps(c6, c7); 4890 c0 = _mm_add_ps(c0, c2); 4891 c4 = _mm_add_ps(c4, c6); 4892 c0 = _mm_add_ps(c0, c4); 4893 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 4894 4895 _mm_store_ps((float*)(source0 + 8 * x), c0); 4896 } 4897 4898 source0 += pitch; 4899 source1 += pitch; 4900 source2 += pitch; 4901 source3 += pitch; 4902 source4 += pitch; 4903 source5 += pitch; 4904 source6 += pitch; 4905 source7 += pitch; 4906 } 4907 } 4908 else if(internal.depth == 16) 4909 { 4910 for(int y = 0; y < height; y++) 4911 { 4912 for(int x = 0; x < width; x += 2) 4913 { 4914 __m128 c0 = _mm_load_ps((float*)(source0 + 8 * x)); 4915 __m128 c1 = _mm_load_ps((float*)(source1 + 8 * x)); 4916 __m128 c2 = _mm_load_ps((float*)(source2 + 8 * x)); 4917 __m128 c3 = _mm_load_ps((float*)(source3 + 8 * x)); 4918 __m128 c4 = _mm_load_ps((float*)(source4 + 8 * x)); 4919 __m128 c5 = _mm_load_ps((float*)(source5 + 8 * x)); 4920 __m128 c6 = _mm_load_ps((float*)(source6 + 8 * x)); 4921 __m128 c7 = _mm_load_ps((float*)(source7 + 8 * x)); 4922 __m128 c8 = _mm_load_ps((float*)(source8 + 8 * x)); 4923 __m128 c9 = _mm_load_ps((float*)(source9 + 8 * x)); 4924 __m128 cA = _mm_load_ps((float*)(sourceA + 8 * x)); 4925 __m128 cB = _mm_load_ps((float*)(sourceB + 8 * x)); 4926 __m128 cC = _mm_load_ps((float*)(sourceC + 8 * x)); 4927 __m128 cD = _mm_load_ps((float*)(sourceD + 8 * x)); 4928 __m128 cE = _mm_load_ps((float*)(sourceE + 8 * x)); 4929 __m128 cF = _mm_load_ps((float*)(sourceF + 8 * x)); 4930 4931 c0 = _mm_add_ps(c0, c1); 4932 c2 = _mm_add_ps(c2, c3); 4933 c4 = _mm_add_ps(c4, c5); 4934 c6 = _mm_add_ps(c6, c7); 4935 c8 = _mm_add_ps(c8, c9); 4936 cA = _mm_add_ps(cA, cB); 4937 cC = _mm_add_ps(cC, cD); 4938 cE = _mm_add_ps(cE, cF); 4939 c0 = _mm_add_ps(c0, c2); 4940 c4 = _mm_add_ps(c4, c6); 4941 c8 = _mm_add_ps(c8, cA); 4942 cC = _mm_add_ps(cC, cE); 4943 c0 = _mm_add_ps(c0, c4); 4944 c8 = _mm_add_ps(c8, cC); 4945 c0 = _mm_add_ps(c0, c8); 4946 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 4947 4948 _mm_store_ps((float*)(source0 + 8 * x), c0); 4949 } 4950 4951 source0 += pitch; 4952 source1 += pitch; 4953 source2 += pitch; 4954 source3 += pitch; 4955 source4 += pitch; 4956 source5 += pitch; 4957 source6 += pitch; 4958 source7 += pitch; 4959 source8 += pitch; 4960 source9 += pitch; 4961 sourceA += pitch; 4962 sourceB += pitch; 4963 sourceC += pitch; 4964 sourceD += pitch; 4965 sourceE += pitch; 4966 sourceF += pitch; 4967 } 4968 } 4969 else ASSERT(false); 4970 } 4971 else 4972 { 4973 if(internal.depth == 2) 4974 { 4975 for(int y = 0; y < height; y++) 4976 { 4977 for(int x = 0; x < 2 * width; x++) 4978 { 4979 float c0 = *(float*)(source0 + 4 * x); 4980 float c1 = *(float*)(source1 + 4 * x); 4981 4982 c0 = c0 + c1; 4983 c0 *= 1.0f / 2.0f; 4984 4985 *(float*)(source0 + 4 * x) = c0; 4986 } 4987 4988 source0 += pitch; 4989 source1 += pitch; 4990 } 4991 } 4992 else if(internal.depth == 4) 4993 { 4994 for(int y = 0; y < height; y++) 4995 { 4996 for(int x = 0; x < 2 * width; x++) 4997 { 4998 float c0 = *(float*)(source0 + 4 * x); 4999 float c1 = *(float*)(source1 + 4 * x); 5000 float c2 = *(float*)(source2 + 4 * x); 5001 float c3 = *(float*)(source3 + 4 * x); 5002 5003 c0 = c0 + c1; 5004 c2 = c2 + c3; 5005 c0 = c0 + c2; 5006 c0 *= 1.0f / 4.0f; 5007 5008 *(float*)(source0 + 4 * x) = c0; 5009 } 5010 5011 source0 += pitch; 5012 source1 += pitch; 5013 source2 += pitch; 5014 source3 += pitch; 5015 } 5016 } 5017 else if(internal.depth == 8) 5018 { 5019 for(int y = 0; y < height; y++) 5020 { 5021 for(int x = 0; x < 2 * width; x++) 5022 { 5023 float c0 = *(float*)(source0 + 4 * x); 5024 float c1 = *(float*)(source1 + 4 * x); 5025 float c2 = *(float*)(source2 + 4 * x); 5026 float c3 = *(float*)(source3 + 4 * x); 5027 float c4 = *(float*)(source4 + 4 * x); 5028 float c5 = *(float*)(source5 + 4 * x); 5029 float c6 = *(float*)(source6 + 4 * x); 5030 float c7 = *(float*)(source7 + 4 * x); 5031 5032 c0 = c0 + c1; 5033 c2 = c2 + c3; 5034 c4 = c4 + c5; 5035 c6 = c6 + c7; 5036 c0 = c0 + c2; 5037 c4 = c4 + c6; 5038 c0 = c0 + c4; 5039 c0 *= 1.0f / 8.0f; 5040 5041 *(float*)(source0 + 4 * x) = c0; 5042 } 5043 5044 source0 += pitch; 5045 source1 += pitch; 5046 source2 += pitch; 5047 source3 += pitch; 5048 source4 += pitch; 5049 source5 += pitch; 5050 source6 += pitch; 5051 source7 += pitch; 5052 } 5053 } 5054 else if(internal.depth == 16) 5055 { 5056 for(int y = 0; y < height; y++) 5057 { 5058 for(int x = 0; x < 2 * width; x++) 5059 { 5060 float c0 = *(float*)(source0 + 4 * x); 5061 float c1 = *(float*)(source1 + 4 * x); 5062 float c2 = *(float*)(source2 + 4 * x); 5063 float c3 = *(float*)(source3 + 4 * x); 5064 float c4 = *(float*)(source4 + 4 * x); 5065 float c5 = *(float*)(source5 + 4 * x); 5066 float c6 = *(float*)(source6 + 4 * x); 5067 float c7 = *(float*)(source7 + 4 * x); 5068 float c8 = *(float*)(source8 + 4 * x); 5069 float c9 = *(float*)(source9 + 4 * x); 5070 float cA = *(float*)(sourceA + 4 * x); 5071 float cB = *(float*)(sourceB + 4 * x); 5072 float cC = *(float*)(sourceC + 4 * x); 5073 float cD = *(float*)(sourceD + 4 * x); 5074 float cE = *(float*)(sourceE + 4 * x); 5075 float cF = *(float*)(sourceF + 4 * x); 5076 5077 c0 = c0 + c1; 5078 c2 = c2 + c3; 5079 c4 = c4 + c5; 5080 c6 = c6 + c7; 5081 c8 = c8 + c9; 5082 cA = cA + cB; 5083 cC = cC + cD; 5084 cE = cE + cF; 5085 c0 = c0 + c2; 5086 c4 = c4 + c6; 5087 c8 = c8 + cA; 5088 cC = cC + cE; 5089 c0 = c0 + c4; 5090 c8 = c8 + cC; 5091 c0 = c0 + c8; 5092 c0 *= 1.0f / 16.0f; 5093 5094 *(float*)(source0 + 4 * x) = c0; 5095 } 5096 5097 source0 += pitch; 5098 source1 += pitch; 5099 source2 += pitch; 5100 source3 += pitch; 5101 source4 += pitch; 5102 source5 += pitch; 5103 source6 += pitch; 5104 source7 += pitch; 5105 source8 += pitch; 5106 source9 += pitch; 5107 sourceA += pitch; 5108 sourceB += pitch; 5109 sourceC += pitch; 5110 sourceD += pitch; 5111 sourceE += pitch; 5112 sourceF += pitch; 5113 } 5114 } 5115 else ASSERT(false); 5116 } 5117 } 5118 else if(internal.format == FORMAT_A32B32G32R32F) 5119 { 5120 if(CPUID::supportsSSE()) 5121 { 5122 if(internal.depth == 2) 5123 { 5124 for(int y = 0; y < height; y++) 5125 { 5126 for(int x = 0; x < width; x++) 5127 { 5128 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5129 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5130 5131 c0 = _mm_add_ps(c0, c1); 5132 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 2.0f)); 5133 5134 _mm_store_ps((float*)(source0 + 16 * x), c0); 5135 } 5136 5137 source0 += pitch; 5138 source1 += pitch; 5139 } 5140 } 5141 else if(internal.depth == 4) 5142 { 5143 for(int y = 0; y < height; y++) 5144 { 5145 for(int x = 0; x < width; x++) 5146 { 5147 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5148 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5149 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5150 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5151 5152 c0 = _mm_add_ps(c0, c1); 5153 c2 = _mm_add_ps(c2, c3); 5154 c0 = _mm_add_ps(c0, c2); 5155 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 4.0f)); 5156 5157 _mm_store_ps((float*)(source0 + 16 * x), c0); 5158 } 5159 5160 source0 += pitch; 5161 source1 += pitch; 5162 source2 += pitch; 5163 source3 += pitch; 5164 } 5165 } 5166 else if(internal.depth == 8) 5167 { 5168 for(int y = 0; y < height; y++) 5169 { 5170 for(int x = 0; x < width; x++) 5171 { 5172 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5173 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5174 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5175 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5176 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x)); 5177 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x)); 5178 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x)); 5179 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x)); 5180 5181 c0 = _mm_add_ps(c0, c1); 5182 c2 = _mm_add_ps(c2, c3); 5183 c4 = _mm_add_ps(c4, c5); 5184 c6 = _mm_add_ps(c6, c7); 5185 c0 = _mm_add_ps(c0, c2); 5186 c4 = _mm_add_ps(c4, c6); 5187 c0 = _mm_add_ps(c0, c4); 5188 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 8.0f)); 5189 5190 _mm_store_ps((float*)(source0 + 16 * x), c0); 5191 } 5192 5193 source0 += pitch; 5194 source1 += pitch; 5195 source2 += pitch; 5196 source3 += pitch; 5197 source4 += pitch; 5198 source5 += pitch; 5199 source6 += pitch; 5200 source7 += pitch; 5201 } 5202 } 5203 else if(internal.depth == 16) 5204 { 5205 for(int y = 0; y < height; y++) 5206 { 5207 for(int x = 0; x < width; x++) 5208 { 5209 __m128 c0 = _mm_load_ps((float*)(source0 + 16 * x)); 5210 __m128 c1 = _mm_load_ps((float*)(source1 + 16 * x)); 5211 __m128 c2 = _mm_load_ps((float*)(source2 + 16 * x)); 5212 __m128 c3 = _mm_load_ps((float*)(source3 + 16 * x)); 5213 __m128 c4 = _mm_load_ps((float*)(source4 + 16 * x)); 5214 __m128 c5 = _mm_load_ps((float*)(source5 + 16 * x)); 5215 __m128 c6 = _mm_load_ps((float*)(source6 + 16 * x)); 5216 __m128 c7 = _mm_load_ps((float*)(source7 + 16 * x)); 5217 __m128 c8 = _mm_load_ps((float*)(source8 + 16 * x)); 5218 __m128 c9 = _mm_load_ps((float*)(source9 + 16 * x)); 5219 __m128 cA = _mm_load_ps((float*)(sourceA + 16 * x)); 5220 __m128 cB = _mm_load_ps((float*)(sourceB + 16 * x)); 5221 __m128 cC = _mm_load_ps((float*)(sourceC + 16 * x)); 5222 __m128 cD = _mm_load_ps((float*)(sourceD + 16 * x)); 5223 __m128 cE = _mm_load_ps((float*)(sourceE + 16 * x)); 5224 __m128 cF = _mm_load_ps((float*)(sourceF + 16 * x)); 5225 5226 c0 = _mm_add_ps(c0, c1); 5227 c2 = _mm_add_ps(c2, c3); 5228 c4 = _mm_add_ps(c4, c5); 5229 c6 = _mm_add_ps(c6, c7); 5230 c8 = _mm_add_ps(c8, c9); 5231 cA = _mm_add_ps(cA, cB); 5232 cC = _mm_add_ps(cC, cD); 5233 cE = _mm_add_ps(cE, cF); 5234 c0 = _mm_add_ps(c0, c2); 5235 c4 = _mm_add_ps(c4, c6); 5236 c8 = _mm_add_ps(c8, cA); 5237 cC = _mm_add_ps(cC, cE); 5238 c0 = _mm_add_ps(c0, c4); 5239 c8 = _mm_add_ps(c8, cC); 5240 c0 = _mm_add_ps(c0, c8); 5241 c0 = _mm_mul_ps(c0, _mm_set1_ps(1.0f / 16.0f)); 5242 5243 _mm_store_ps((float*)(source0 + 16 * x), c0); 5244 } 5245 5246 source0 += pitch; 5247 source1 += pitch; 5248 source2 += pitch; 5249 source3 += pitch; 5250 source4 += pitch; 5251 source5 += pitch; 5252 source6 += pitch; 5253 source7 += pitch; 5254 source8 += pitch; 5255 source9 += pitch; 5256 sourceA += pitch; 5257 sourceB += pitch; 5258 sourceC += pitch; 5259 sourceD += pitch; 5260 sourceE += pitch; 5261 sourceF += pitch; 5262 } 5263 } 5264 else ASSERT(false); 5265 } 5266 else 5267 { 5268 if(internal.depth == 2) 5269 { 5270 for(int y = 0; y < height; y++) 5271 { 5272 for(int x = 0; x < 4 * width; x++) 5273 { 5274 float c0 = *(float*)(source0 + 4 * x); 5275 float c1 = *(float*)(source1 + 4 * x); 5276 5277 c0 = c0 + c1; 5278 c0 *= 1.0f / 2.0f; 5279 5280 *(float*)(source0 + 4 * x) = c0; 5281 } 5282 5283 source0 += pitch; 5284 source1 += pitch; 5285 } 5286 } 5287 else if(internal.depth == 4) 5288 { 5289 for(int y = 0; y < height; y++) 5290 { 5291 for(int x = 0; x < 4 * width; x++) 5292 { 5293 float c0 = *(float*)(source0 + 4 * x); 5294 float c1 = *(float*)(source1 + 4 * x); 5295 float c2 = *(float*)(source2 + 4 * x); 5296 float c3 = *(float*)(source3 + 4 * x); 5297 5298 c0 = c0 + c1; 5299 c2 = c2 + c3; 5300 c0 = c0 + c2; 5301 c0 *= 1.0f / 4.0f; 5302 5303 *(float*)(source0 + 4 * x) = c0; 5304 } 5305 5306 source0 += pitch; 5307 source1 += pitch; 5308 source2 += pitch; 5309 source3 += pitch; 5310 } 5311 } 5312 else if(internal.depth == 8) 5313 { 5314 for(int y = 0; y < height; y++) 5315 { 5316 for(int x = 0; x < 4 * width; x++) 5317 { 5318 float c0 = *(float*)(source0 + 4 * x); 5319 float c1 = *(float*)(source1 + 4 * x); 5320 float c2 = *(float*)(source2 + 4 * x); 5321 float c3 = *(float*)(source3 + 4 * x); 5322 float c4 = *(float*)(source4 + 4 * x); 5323 float c5 = *(float*)(source5 + 4 * x); 5324 float c6 = *(float*)(source6 + 4 * x); 5325 float c7 = *(float*)(source7 + 4 * x); 5326 5327 c0 = c0 + c1; 5328 c2 = c2 + c3; 5329 c4 = c4 + c5; 5330 c6 = c6 + c7; 5331 c0 = c0 + c2; 5332 c4 = c4 + c6; 5333 c0 = c0 + c4; 5334 c0 *= 1.0f / 8.0f; 5335 5336 *(float*)(source0 + 4 * x) = c0; 5337 } 5338 5339 source0 += pitch; 5340 source1 += pitch; 5341 source2 += pitch; 5342 source3 += pitch; 5343 source4 += pitch; 5344 source5 += pitch; 5345 source6 += pitch; 5346 source7 += pitch; 5347 } 5348 } 5349 else if(internal.depth == 16) 5350 { 5351 for(int y = 0; y < height; y++) 5352 { 5353 for(int x = 0; x < 4 * width; x++) 5354 { 5355 float c0 = *(float*)(source0 + 4 * x); 5356 float c1 = *(float*)(source1 + 4 * x); 5357 float c2 = *(float*)(source2 + 4 * x); 5358 float c3 = *(float*)(source3 + 4 * x); 5359 float c4 = *(float*)(source4 + 4 * x); 5360 float c5 = *(float*)(source5 + 4 * x); 5361 float c6 = *(float*)(source6 + 4 * x); 5362 float c7 = *(float*)(source7 + 4 * x); 5363 float c8 = *(float*)(source8 + 4 * x); 5364 float c9 = *(float*)(source9 + 4 * x); 5365 float cA = *(float*)(sourceA + 4 * x); 5366 float cB = *(float*)(sourceB + 4 * x); 5367 float cC = *(float*)(sourceC + 4 * x); 5368 float cD = *(float*)(sourceD + 4 * x); 5369 float cE = *(float*)(sourceE + 4 * x); 5370 float cF = *(float*)(sourceF + 4 * x); 5371 5372 c0 = c0 + c1; 5373 c2 = c2 + c3; 5374 c4 = c4 + c5; 5375 c6 = c6 + c7; 5376 c8 = c8 + c9; 5377 cA = cA + cB; 5378 cC = cC + cD; 5379 cE = cE + cF; 5380 c0 = c0 + c2; 5381 c4 = c4 + c6; 5382 c8 = c8 + cA; 5383 cC = cC + cE; 5384 c0 = c0 + c4; 5385 c8 = c8 + cC; 5386 c0 = c0 + c8; 5387 c0 *= 1.0f / 16.0f; 5388 5389 *(float*)(source0 + 4 * x) = c0; 5390 } 5391 5392 source0 += pitch; 5393 source1 += pitch; 5394 source2 += pitch; 5395 source3 += pitch; 5396 source4 += pitch; 5397 source5 += pitch; 5398 source6 += pitch; 5399 source7 += pitch; 5400 source8 += pitch; 5401 source9 += pitch; 5402 sourceA += pitch; 5403 sourceB += pitch; 5404 sourceC += pitch; 5405 sourceD += pitch; 5406 sourceE += pitch; 5407 sourceF += pitch; 5408 } 5409 } 5410 else ASSERT(false); 5411 } 5412 } 5413 else if(internal.format == FORMAT_R5G6B5) 5414 { 5415 if(CPUID::supportsSSE2() && (width % 8) == 0) 5416 { 5417 if(internal.depth == 2) 5418 { 5419 for(int y = 0; y < height; y++) 5420 { 5421 for(int x = 0; x < width; x += 8) 5422 { 5423 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5424 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5425 5426 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5427 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5428 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5429 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5430 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5431 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5432 5433 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5434 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5435 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5436 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5437 c0 = _mm_or_si128(c0, c1); 5438 5439 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5440 } 5441 5442 source0 += pitch; 5443 source1 += pitch; 5444 } 5445 } 5446 else if(internal.depth == 4) 5447 { 5448 for(int y = 0; y < height; y++) 5449 { 5450 for(int x = 0; x < width; x += 8) 5451 { 5452 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5453 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5454 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5455 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5456 5457 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5458 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5459 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5460 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5461 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5462 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5463 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5464 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5465 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5466 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5467 5468 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5469 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5470 c0 = _mm_avg_epu8(c0, c2); 5471 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5472 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5473 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5474 c1 = _mm_avg_epu16(c1, c3); 5475 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5476 c0 = _mm_or_si128(c0, c1); 5477 5478 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5479 } 5480 5481 source0 += pitch; 5482 source1 += pitch; 5483 source2 += pitch; 5484 source3 += pitch; 5485 } 5486 } 5487 else if(internal.depth == 8) 5488 { 5489 for(int y = 0; y < height; y++) 5490 { 5491 for(int x = 0; x < width; x += 8) 5492 { 5493 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5494 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5495 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5496 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5497 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x)); 5498 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x)); 5499 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x)); 5500 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x)); 5501 5502 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5503 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5504 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5505 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5506 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5507 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5508 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5509 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5510 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5511 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5512 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b)); 5513 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_)); 5514 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b)); 5515 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_)); 5516 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b)); 5517 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_)); 5518 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b)); 5519 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_)); 5520 5521 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5522 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5523 c4 = _mm_avg_epu8(c4_r_b, c5_r_b); 5524 c6 = _mm_avg_epu8(c6_r_b, c7_r_b); 5525 c0 = _mm_avg_epu8(c0, c2); 5526 c4 = _mm_avg_epu8(c4, c6); 5527 c0 = _mm_avg_epu8(c0, c4); 5528 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5529 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5530 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5531 c5 = _mm_avg_epu16(c4__g_, c5__g_); 5532 c7 = _mm_avg_epu16(c6__g_, c7__g_); 5533 c1 = _mm_avg_epu16(c1, c3); 5534 c5 = _mm_avg_epu16(c5, c7); 5535 c1 = _mm_avg_epu16(c1, c5); 5536 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5537 c0 = _mm_or_si128(c0, c1); 5538 5539 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5540 } 5541 5542 source0 += pitch; 5543 source1 += pitch; 5544 source2 += pitch; 5545 source3 += pitch; 5546 source4 += pitch; 5547 source5 += pitch; 5548 source6 += pitch; 5549 source7 += pitch; 5550 } 5551 } 5552 else if(internal.depth == 16) 5553 { 5554 for(int y = 0; y < height; y++) 5555 { 5556 for(int x = 0; x < width; x += 8) 5557 { 5558 __m128i c0 = _mm_load_si128((__m128i*)(source0 + 2 * x)); 5559 __m128i c1 = _mm_load_si128((__m128i*)(source1 + 2 * x)); 5560 __m128i c2 = _mm_load_si128((__m128i*)(source2 + 2 * x)); 5561 __m128i c3 = _mm_load_si128((__m128i*)(source3 + 2 * x)); 5562 __m128i c4 = _mm_load_si128((__m128i*)(source4 + 2 * x)); 5563 __m128i c5 = _mm_load_si128((__m128i*)(source5 + 2 * x)); 5564 __m128i c6 = _mm_load_si128((__m128i*)(source6 + 2 * x)); 5565 __m128i c7 = _mm_load_si128((__m128i*)(source7 + 2 * x)); 5566 __m128i c8 = _mm_load_si128((__m128i*)(source8 + 2 * x)); 5567 __m128i c9 = _mm_load_si128((__m128i*)(source9 + 2 * x)); 5568 __m128i cA = _mm_load_si128((__m128i*)(sourceA + 2 * x)); 5569 __m128i cB = _mm_load_si128((__m128i*)(sourceB + 2 * x)); 5570 __m128i cC = _mm_load_si128((__m128i*)(sourceC + 2 * x)); 5571 __m128i cD = _mm_load_si128((__m128i*)(sourceD + 2 * x)); 5572 __m128i cE = _mm_load_si128((__m128i*)(sourceE + 2 * x)); 5573 __m128i cF = _mm_load_si128((__m128i*)(sourceF + 2 * x)); 5574 5575 static const ushort8 r_b = {0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F, 0xF81F}; 5576 static const ushort8 _g_ = {0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0, 0x07E0}; 5577 __m128i c0_r_b = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5578 __m128i c0__g_ = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(_g_)); 5579 __m128i c1_r_b = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(r_b)); 5580 __m128i c1__g_ = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5581 __m128i c2_r_b = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(r_b)); 5582 __m128i c2__g_ = _mm_and_si128(c2, reinterpret_cast<const __m128i&>(_g_)); 5583 __m128i c3_r_b = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(r_b)); 5584 __m128i c3__g_ = _mm_and_si128(c3, reinterpret_cast<const __m128i&>(_g_)); 5585 __m128i c4_r_b = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(r_b)); 5586 __m128i c4__g_ = _mm_and_si128(c4, reinterpret_cast<const __m128i&>(_g_)); 5587 __m128i c5_r_b = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(r_b)); 5588 __m128i c5__g_ = _mm_and_si128(c5, reinterpret_cast<const __m128i&>(_g_)); 5589 __m128i c6_r_b = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(r_b)); 5590 __m128i c6__g_ = _mm_and_si128(c6, reinterpret_cast<const __m128i&>(_g_)); 5591 __m128i c7_r_b = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(r_b)); 5592 __m128i c7__g_ = _mm_and_si128(c7, reinterpret_cast<const __m128i&>(_g_)); 5593 __m128i c8_r_b = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(r_b)); 5594 __m128i c8__g_ = _mm_and_si128(c8, reinterpret_cast<const __m128i&>(_g_)); 5595 __m128i c9_r_b = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(r_b)); 5596 __m128i c9__g_ = _mm_and_si128(c9, reinterpret_cast<const __m128i&>(_g_)); 5597 __m128i cA_r_b = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(r_b)); 5598 __m128i cA__g_ = _mm_and_si128(cA, reinterpret_cast<const __m128i&>(_g_)); 5599 __m128i cB_r_b = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(r_b)); 5600 __m128i cB__g_ = _mm_and_si128(cB, reinterpret_cast<const __m128i&>(_g_)); 5601 __m128i cC_r_b = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(r_b)); 5602 __m128i cC__g_ = _mm_and_si128(cC, reinterpret_cast<const __m128i&>(_g_)); 5603 __m128i cD_r_b = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(r_b)); 5604 __m128i cD__g_ = _mm_and_si128(cD, reinterpret_cast<const __m128i&>(_g_)); 5605 __m128i cE_r_b = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(r_b)); 5606 __m128i cE__g_ = _mm_and_si128(cE, reinterpret_cast<const __m128i&>(_g_)); 5607 __m128i cF_r_b = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(r_b)); 5608 __m128i cF__g_ = _mm_and_si128(cF, reinterpret_cast<const __m128i&>(_g_)); 5609 5610 c0 = _mm_avg_epu8(c0_r_b, c1_r_b); 5611 c2 = _mm_avg_epu8(c2_r_b, c3_r_b); 5612 c4 = _mm_avg_epu8(c4_r_b, c5_r_b); 5613 c6 = _mm_avg_epu8(c6_r_b, c7_r_b); 5614 c8 = _mm_avg_epu8(c8_r_b, c9_r_b); 5615 cA = _mm_avg_epu8(cA_r_b, cB_r_b); 5616 cC = _mm_avg_epu8(cC_r_b, cD_r_b); 5617 cE = _mm_avg_epu8(cE_r_b, cF_r_b); 5618 c0 = _mm_avg_epu8(c0, c2); 5619 c4 = _mm_avg_epu8(c4, c6); 5620 c8 = _mm_avg_epu8(c8, cA); 5621 cC = _mm_avg_epu8(cC, cE); 5622 c0 = _mm_avg_epu8(c0, c4); 5623 c8 = _mm_avg_epu8(c8, cC); 5624 c0 = _mm_avg_epu8(c0, c8); 5625 c0 = _mm_and_si128(c0, reinterpret_cast<const __m128i&>(r_b)); 5626 c1 = _mm_avg_epu16(c0__g_, c1__g_); 5627 c3 = _mm_avg_epu16(c2__g_, c3__g_); 5628 c5 = _mm_avg_epu16(c4__g_, c5__g_); 5629 c7 = _mm_avg_epu16(c6__g_, c7__g_); 5630 c9 = _mm_avg_epu16(c8__g_, c9__g_); 5631 cB = _mm_avg_epu16(cA__g_, cB__g_); 5632 cD = _mm_avg_epu16(cC__g_, cD__g_); 5633 cF = _mm_avg_epu16(cE__g_, cF__g_); 5634 c1 = _mm_avg_epu8(c1, c3); 5635 c5 = _mm_avg_epu8(c5, c7); 5636 c9 = _mm_avg_epu8(c9, cB); 5637 cD = _mm_avg_epu8(cD, cF); 5638 c1 = _mm_avg_epu8(c1, c5); 5639 c9 = _mm_avg_epu8(c9, cD); 5640 c1 = _mm_avg_epu8(c1, c9); 5641 c1 = _mm_and_si128(c1, reinterpret_cast<const __m128i&>(_g_)); 5642 c0 = _mm_or_si128(c0, c1); 5643 5644 _mm_store_si128((__m128i*)(source0 + 2 * x), c0); 5645 } 5646 5647 source0 += pitch; 5648 source1 += pitch; 5649 source2 += pitch; 5650 source3 += pitch; 5651 source4 += pitch; 5652 source5 += pitch; 5653 source6 += pitch; 5654 source7 += pitch; 5655 source8 += pitch; 5656 source9 += pitch; 5657 sourceA += pitch; 5658 sourceB += pitch; 5659 sourceC += pitch; 5660 sourceD += pitch; 5661 sourceE += pitch; 5662 sourceF += pitch; 5663 } 5664 } 5665 else ASSERT(false); 5666 } 5667 else 5668 { 5669 #define AVERAGE(x, y) (((x) & (y)) + ((((x) ^ (y)) >> 1) & 0x7BEF) + (((x) ^ (y)) & 0x0821)) 5670 5671 if(internal.depth == 2) 5672 { 5673 for(int y = 0; y < height; y++) 5674 { 5675 for(int x = 0; x < width; x++) 5676 { 5677 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5678 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5679 5680 c0 = AVERAGE(c0, c1); 5681 5682 *(unsigned short*)(source0 + 2 * x) = c0; 5683 } 5684 5685 source0 += pitch; 5686 source1 += pitch; 5687 } 5688 } 5689 else if(internal.depth == 4) 5690 { 5691 for(int y = 0; y < height; y++) 5692 { 5693 for(int x = 0; x < width; x++) 5694 { 5695 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5696 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5697 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 5698 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 5699 5700 c0 = AVERAGE(c0, c1); 5701 c2 = AVERAGE(c2, c3); 5702 c0 = AVERAGE(c0, c2); 5703 5704 *(unsigned short*)(source0 + 2 * x) = c0; 5705 } 5706 5707 source0 += pitch; 5708 source1 += pitch; 5709 source2 += pitch; 5710 source3 += pitch; 5711 } 5712 } 5713 else if(internal.depth == 8) 5714 { 5715 for(int y = 0; y < height; y++) 5716 { 5717 for(int x = 0; x < width; x++) 5718 { 5719 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5720 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5721 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 5722 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 5723 unsigned short c4 = *(unsigned short*)(source4 + 2 * x); 5724 unsigned short c5 = *(unsigned short*)(source5 + 2 * x); 5725 unsigned short c6 = *(unsigned short*)(source6 + 2 * x); 5726 unsigned short c7 = *(unsigned short*)(source7 + 2 * x); 5727 5728 c0 = AVERAGE(c0, c1); 5729 c2 = AVERAGE(c2, c3); 5730 c4 = AVERAGE(c4, c5); 5731 c6 = AVERAGE(c6, c7); 5732 c0 = AVERAGE(c0, c2); 5733 c4 = AVERAGE(c4, c6); 5734 c0 = AVERAGE(c0, c4); 5735 5736 *(unsigned short*)(source0 + 2 * x) = c0; 5737 } 5738 5739 source0 += pitch; 5740 source1 += pitch; 5741 source2 += pitch; 5742 source3 += pitch; 5743 source4 += pitch; 5744 source5 += pitch; 5745 source6 += pitch; 5746 source7 += pitch; 5747 } 5748 } 5749 else if(internal.depth == 16) 5750 { 5751 for(int y = 0; y < height; y++) 5752 { 5753 for(int x = 0; x < width; x++) 5754 { 5755 unsigned short c0 = *(unsigned short*)(source0 + 2 * x); 5756 unsigned short c1 = *(unsigned short*)(source1 + 2 * x); 5757 unsigned short c2 = *(unsigned short*)(source2 + 2 * x); 5758 unsigned short c3 = *(unsigned short*)(source3 + 2 * x); 5759 unsigned short c4 = *(unsigned short*)(source4 + 2 * x); 5760 unsigned short c5 = *(unsigned short*)(source5 + 2 * x); 5761 unsigned short c6 = *(unsigned short*)(source6 + 2 * x); 5762 unsigned short c7 = *(unsigned short*)(source7 + 2 * x); 5763 unsigned short c8 = *(unsigned short*)(source8 + 2 * x); 5764 unsigned short c9 = *(unsigned short*)(source9 + 2 * x); 5765 unsigned short cA = *(unsigned short*)(sourceA + 2 * x); 5766 unsigned short cB = *(unsigned short*)(sourceB + 2 * x); 5767 unsigned short cC = *(unsigned short*)(sourceC + 2 * x); 5768 unsigned short cD = *(unsigned short*)(sourceD + 2 * x); 5769 unsigned short cE = *(unsigned short*)(sourceE + 2 * x); 5770 unsigned short cF = *(unsigned short*)(sourceF + 2 * x); 5771 5772 c0 = AVERAGE(c0, c1); 5773 c2 = AVERAGE(c2, c3); 5774 c4 = AVERAGE(c4, c5); 5775 c6 = AVERAGE(c6, c7); 5776 c8 = AVERAGE(c8, c9); 5777 cA = AVERAGE(cA, cB); 5778 cC = AVERAGE(cC, cD); 5779 cE = AVERAGE(cE, cF); 5780 c0 = AVERAGE(c0, c2); 5781 c4 = AVERAGE(c4, c6); 5782 c8 = AVERAGE(c8, cA); 5783 cC = AVERAGE(cC, cE); 5784 c0 = AVERAGE(c0, c4); 5785 c8 = AVERAGE(c8, cC); 5786 c0 = AVERAGE(c0, c8); 5787 5788 *(unsigned short*)(source0 + 2 * x) = c0; 5789 } 5790 5791 source0 += pitch; 5792 source1 += pitch; 5793 source2 += pitch; 5794 source3 += pitch; 5795 source4 += pitch; 5796 source5 += pitch; 5797 source6 += pitch; 5798 source7 += pitch; 5799 source8 += pitch; 5800 source9 += pitch; 5801 sourceA += pitch; 5802 sourceB += pitch; 5803 sourceC += pitch; 5804 sourceD += pitch; 5805 sourceE += pitch; 5806 sourceF += pitch; 5807 } 5808 } 5809 else ASSERT(false); 5810 5811 #undef AVERAGE 5812 } 5813 } 5814 else 5815 { 5816 // UNIMPLEMENTED(); 5817 } 5818 } 5819} 5820