1/* 2 * Copyright 2014 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8#include "SkTextureCompressor_LATC.h" 9#include "SkTextureCompressor_Blitter.h" 10 11#include "SkBlitter.h" 12#include "SkEndian.h" 13 14// Compression options. In general, the slow version is much more accurate, but 15// much slower. The fast option is much faster, but much less accurate. YMMV. 16#define COMPRESS_LATC_SLOW 0 17#define COMPRESS_LATC_FAST 1 18 19//////////////////////////////////////////////////////////////////////////////// 20 21// Generates an LATC palette. LATC constructs 22// a palette of eight colors from LUM0 and LUM1 using the algorithm: 23// 24// LUM0, if lum0 > lum1 and code(x,y) == 0 25// LUM1, if lum0 > lum1 and code(x,y) == 1 26// (6*LUM0+ LUM1)/7, if lum0 > lum1 and code(x,y) == 2 27// (5*LUM0+2*LUM1)/7, if lum0 > lum1 and code(x,y) == 3 28// (4*LUM0+3*LUM1)/7, if lum0 > lum1 and code(x,y) == 4 29// (3*LUM0+4*LUM1)/7, if lum0 > lum1 and code(x,y) == 5 30// (2*LUM0+5*LUM1)/7, if lum0 > lum1 and code(x,y) == 6 31// ( LUM0+6*LUM1)/7, if lum0 > lum1 and code(x,y) == 7 32// 33// LUM0, if lum0 <= lum1 and code(x,y) == 0 34// LUM1, if lum0 <= lum1 and code(x,y) == 1 35// (4*LUM0+ LUM1)/5, if lum0 <= lum1 and code(x,y) == 2 36// (3*LUM0+2*LUM1)/5, if lum0 <= lum1 and code(x,y) == 3 37// (2*LUM0+3*LUM1)/5, if lum0 <= lum1 and code(x,y) == 4 38// ( LUM0+4*LUM1)/5, if lum0 <= lum1 and code(x,y) == 5 39// 0, if lum0 <= lum1 and code(x,y) == 6 40// 255, if lum0 <= lum1 and code(x,y) == 7 41 42static const int kLATCPaletteSize = 8; 43static void generate_latc_palette(uint8_t palette[], uint8_t lum0, uint8_t lum1) { 44 palette[0] = lum0; 45 palette[1] = lum1; 46 if (lum0 > lum1) { 47 for (int i = 1; i < 7; i++) { 48 palette[i+1] = ((7-i)*lum0 + i*lum1) / 7; 49 } 50 } else { 51 for (int i = 1; i < 5; i++) { 52 palette[i+1] = ((5-i)*lum0 + i*lum1) / 5; 53 } 54 palette[6] = 0; 55 palette[7] = 255; 56 } 57} 58 59//////////////////////////////////////////////////////////////////////////////// 60 61#if COMPRESS_LATC_SLOW 62 63//////////////////////////////////////////////////////////////////////////////// 64// 65// Utility Functions 66// 67//////////////////////////////////////////////////////////////////////////////// 68 69// Absolute difference between two values. More correct than SkTAbs(a - b) 70// because it works on unsigned values. 71template <typename T> inline T abs_diff(const T &a, const T &b) { 72 return (a > b) ? (a - b) : (b - a); 73} 74 75static bool is_extremal(uint8_t pixel) { 76 return 0 == pixel || 255 == pixel; 77} 78 79typedef uint64_t (*A84x4To64BitProc)(const uint8_t block[]); 80 81// This function is used by both R11 EAC and LATC to compress 4x4 blocks 82// of 8-bit alpha into 64-bit values that comprise the compressed data. 83// For both formats, we need to make sure that the dimensions of the 84// src pixels are divisible by 4, and copy 4x4 blocks one at a time 85// for compression. 86static bool compress_4x4_a8_to_64bit(uint8_t* dst, const uint8_t* src, 87 int width, int height, int rowBytes, 88 A84x4To64BitProc proc) { 89 // Make sure that our data is well-formed enough to be considered for compression 90 if (0 == width || 0 == height || (width % 4) != 0 || (height % 4) != 0) { 91 return false; 92 } 93 94 int blocksX = width >> 2; 95 int blocksY = height >> 2; 96 97 uint8_t block[16]; 98 uint64_t* encPtr = reinterpret_cast<uint64_t*>(dst); 99 for (int y = 0; y < blocksY; ++y) { 100 for (int x = 0; x < blocksX; ++x) { 101 // Load block 102 for (int k = 0; k < 4; ++k) { 103 memcpy(block + k*4, src + k*rowBytes + 4*x, 4); 104 } 105 106 // Compress it 107 *encPtr = proc(block); 108 ++encPtr; 109 } 110 src += 4 * rowBytes; 111 } 112 113 return true; 114} 115 116//////////////////////////////////////////////////////////////////////////////// 117// 118// LATC compressor 119// 120//////////////////////////////////////////////////////////////////////////////// 121 122// LATC compressed texels down into square 4x4 blocks 123static const int kLATCBlockSize = 4; 124static const int kLATCPixelsPerBlock = kLATCBlockSize * kLATCBlockSize; 125 126// Compress a block by using the bounding box of the pixels. It is assumed that 127// there are no extremal pixels in this block otherwise we would have used 128// compressBlockBBIgnoreExtremal. 129static uint64_t compress_latc_block_bb(const uint8_t pixels[]) { 130 uint8_t minVal = 255; 131 uint8_t maxVal = 0; 132 for (int i = 0; i < kLATCPixelsPerBlock; ++i) { 133 minVal = SkTMin(pixels[i], minVal); 134 maxVal = SkTMax(pixels[i], maxVal); 135 } 136 137 SkASSERT(!is_extremal(minVal)); 138 SkASSERT(!is_extremal(maxVal)); 139 140 uint8_t palette[kLATCPaletteSize]; 141 generate_latc_palette(palette, maxVal, minVal); 142 143 uint64_t indices = 0; 144 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) { 145 146 // Find the best palette index 147 uint8_t bestError = abs_diff(pixels[i], palette[0]); 148 uint8_t idx = 0; 149 for (int j = 1; j < kLATCPaletteSize; ++j) { 150 uint8_t error = abs_diff(pixels[i], palette[j]); 151 if (error < bestError) { 152 bestError = error; 153 idx = j; 154 } 155 } 156 157 indices <<= 3; 158 indices |= idx; 159 } 160 161 return 162 SkEndian_SwapLE64( 163 static_cast<uint64_t>(maxVal) | 164 (static_cast<uint64_t>(minVal) << 8) | 165 (indices << 16)); 166} 167 168// Compress a block by using the bounding box of the pixels without taking into 169// account the extremal values. The generated palette will contain extremal values 170// and fewer points along the line segment to interpolate. 171static uint64_t compress_latc_block_bb_ignore_extremal(const uint8_t pixels[]) { 172 uint8_t minVal = 255; 173 uint8_t maxVal = 0; 174 for (int i = 0; i < kLATCPixelsPerBlock; ++i) { 175 if (is_extremal(pixels[i])) { 176 continue; 177 } 178 179 minVal = SkTMin(pixels[i], minVal); 180 maxVal = SkTMax(pixels[i], maxVal); 181 } 182 183 SkASSERT(!is_extremal(minVal)); 184 SkASSERT(!is_extremal(maxVal)); 185 186 uint8_t palette[kLATCPaletteSize]; 187 generate_latc_palette(palette, minVal, maxVal); 188 189 uint64_t indices = 0; 190 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) { 191 192 // Find the best palette index 193 uint8_t idx = 0; 194 if (is_extremal(pixels[i])) { 195 if (0xFF == pixels[i]) { 196 idx = 7; 197 } else if (0 == pixels[i]) { 198 idx = 6; 199 } else { 200 SkFAIL("Pixel is extremal but not really?!"); 201 } 202 } else { 203 uint8_t bestError = abs_diff(pixels[i], palette[0]); 204 for (int j = 1; j < kLATCPaletteSize - 2; ++j) { 205 uint8_t error = abs_diff(pixels[i], palette[j]); 206 if (error < bestError) { 207 bestError = error; 208 idx = j; 209 } 210 } 211 } 212 213 indices <<= 3; 214 indices |= idx; 215 } 216 217 return 218 SkEndian_SwapLE64( 219 static_cast<uint64_t>(minVal) | 220 (static_cast<uint64_t>(maxVal) << 8) | 221 (indices << 16)); 222} 223 224 225// Compress LATC block. Each 4x4 block of pixels is decompressed by LATC from two 226// values LUM0 and LUM1, and an index into the generated palette. Details of how 227// the palette is generated can be found in the comments of generatePalette above. 228// 229// We choose which palette type to use based on whether or not 'pixels' contains 230// any extremal values (0 or 255). If there are extremal values, then we use the 231// palette that has the extremal values built in. Otherwise, we use the full bounding 232// box. 233 234static uint64_t compress_latc_block(const uint8_t pixels[]) { 235 // Collect unique pixels 236 int nUniquePixels = 0; 237 uint8_t uniquePixels[kLATCPixelsPerBlock]; 238 for (int i = 0; i < kLATCPixelsPerBlock; ++i) { 239 bool foundPixel = false; 240 for (int j = 0; j < nUniquePixels; ++j) { 241 foundPixel = foundPixel || uniquePixels[j] == pixels[i]; 242 } 243 244 if (!foundPixel) { 245 uniquePixels[nUniquePixels] = pixels[i]; 246 ++nUniquePixels; 247 } 248 } 249 250 // If there's only one unique pixel, then our compression is easy. 251 if (1 == nUniquePixels) { 252 return SkEndian_SwapLE64(pixels[0] | (pixels[0] << 8)); 253 254 // Similarly, if there are only two unique pixels, then our compression is 255 // easy again: place the pixels in the block header, and assign the indices 256 // with one or zero depending on which pixel they belong to. 257 } else if (2 == nUniquePixels) { 258 uint64_t outBlock = 0; 259 for (int i = kLATCPixelsPerBlock - 1; i >= 0; --i) { 260 int idx = 0; 261 if (pixels[i] == uniquePixels[1]) { 262 idx = 1; 263 } 264 265 outBlock <<= 3; 266 outBlock |= idx; 267 } 268 outBlock <<= 16; 269 outBlock |= (uniquePixels[0] | (uniquePixels[1] << 8)); 270 return SkEndian_SwapLE64(outBlock); 271 } 272 273 // Count non-maximal pixel values 274 int nonExtremalPixels = 0; 275 for (int i = 0; i < nUniquePixels; ++i) { 276 if (!is_extremal(uniquePixels[i])) { 277 ++nonExtremalPixels; 278 } 279 } 280 281 // If all the pixels are nonmaximal then compute the palette using 282 // the bounding box of all the pixels. 283 if (nonExtremalPixels == nUniquePixels) { 284 // This is really just for correctness, in all of my tests we 285 // never take this step. We don't lose too much perf here because 286 // most of the processing in this function is worth it for the 287 // 1 == nUniquePixels optimization. 288 return compress_latc_block_bb(pixels); 289 } else { 290 return compress_latc_block_bb_ignore_extremal(pixels); 291 } 292} 293 294#endif // COMPRESS_LATC_SLOW 295 296//////////////////////////////////////////////////////////////////////////////// 297 298#if COMPRESS_LATC_FAST 299 300// Take the top three bits of each index and pack them into the low 12 301// bits of the integer. 302static inline uint32_t pack_index(uint32_t x) { 303 // Pack it in... 304#if defined (SK_CPU_BENDIAN) 305 return 306 (x >> 24) | 307 ((x >> 13) & 0x38) | 308 ((x >> 2) & 0x1C0) | 309 ((x << 9) & 0xE00); 310#else 311 return 312 (x & 0x7) | 313 ((x >> 5) & 0x38) | 314 ((x >> 10) & 0x1C0) | 315 ((x >> 15) & 0xE00); 316#endif 317} 318 319// Converts each 8-bit byte in the integer into an LATC index, and then packs 320// the indices into the low 12 bits of the integer. 321static inline uint32_t convert_index(uint32_t x) { 322 // Since the palette is 323 // 255, 0, 219, 182, 146, 109, 73, 36 324 // we need to map the high three bits of each byte in the integer 325 // from 326 // 0 1 2 3 4 5 6 7 327 // to 328 // 1 7 6 5 4 3 2 0 329 // 330 // This first operation takes the mapping from 331 // 0 1 2 3 4 5 6 7 --> 7 6 5 4 3 2 1 0 332 x = 0x07070707 - ((x >> 5) & 0x07070707); 333 334 // mask is 1 if index is non-zero 335 const uint32_t mask = (x | (x >> 1) | (x >> 2)) & 0x01010101; 336 337 // add mask: 338 // 7 6 5 4 3 2 1 0 --> 8 7 6 5 4 3 2 0 339 x = (x + mask); 340 341 // Handle overflow: 342 // 8 7 6 5 4 3 2 0 --> 9 7 6 5 4 3 2 0 343 x |= (x >> 3) & 0x01010101; 344 345 // Mask out high bits: 346 // 9 7 6 5 4 3 2 0 --> 1 7 6 5 4 3 2 0 347 x &= 0x07070707; 348 349 return pack_index(x); 350} 351 352typedef uint64_t (*PackIndicesProc)(const uint8_t* alpha, int rowBytes); 353template<PackIndicesProc packIndicesProc> 354static void compress_a8_latc_block(uint8_t** dstPtr, const uint8_t* src, int rowBytes) { 355 *(reinterpret_cast<uint64_t*>(*dstPtr)) = 356 SkEndian_SwapLE64(0xFF | (packIndicesProc(src, rowBytes) << 16)); 357 *dstPtr += 8; 358} 359 360inline uint64_t PackRowMajor(const uint8_t *indices, int rowBytes) { 361 uint64_t result = 0; 362 for (int i = 0; i < 4; ++i) { 363 const uint32_t idx = *(reinterpret_cast<const uint32_t*>(indices + i*rowBytes)); 364 result |= static_cast<uint64_t>(convert_index(idx)) << 12*i; 365 } 366 return result; 367} 368 369inline uint64_t PackColumnMajor(const uint8_t *indices, int rowBytes) { 370 // !SPEED! Blarg, this is kind of annoying. SSE4 can make this 371 // a LOT faster. 372 uint8_t transposed[16]; 373 for (int i = 0; i < 4; ++i) { 374 for (int j = 0; j < 4; ++j) { 375 transposed[j*4+i] = indices[i*rowBytes + j]; 376 } 377 } 378 379 return PackRowMajor(transposed, 4); 380} 381 382static bool compress_4x4_a8_latc(uint8_t* dst, const uint8_t* src, 383 int width, int height, int rowBytes) { 384 385 if (width < 0 || ((width % 4) != 0) || height < 0 || ((height % 4) != 0)) { 386 return false; 387 } 388 389 uint8_t** dstPtr = &dst; 390 for (int y = 0; y < height; y += 4) { 391 for (int x = 0; x < width; x += 4) { 392 compress_a8_latc_block<PackRowMajor>(dstPtr, src + y*rowBytes + x, rowBytes); 393 } 394 } 395 396 return true; 397} 398 399void CompressA8LATCBlockVertical(uint8_t* dst, const uint8_t block[]) { 400 compress_a8_latc_block<PackColumnMajor>(&dst, block, 4); 401} 402 403#endif // COMPRESS_LATC_FAST 404 405void decompress_latc_block(uint8_t* dst, int dstRowBytes, const uint8_t* src) { 406 uint64_t block = SkEndian_SwapLE64(*(reinterpret_cast<const uint64_t *>(src))); 407 uint8_t lum0 = block & 0xFF; 408 uint8_t lum1 = (block >> 8) & 0xFF; 409 410 uint8_t palette[kLATCPaletteSize]; 411 generate_latc_palette(palette, lum0, lum1); 412 413 block >>= 16; 414 for (int j = 0; j < 4; ++j) { 415 for (int i = 0; i < 4; ++i) { 416 dst[i] = palette[block & 0x7]; 417 block >>= 3; 418 } 419 dst += dstRowBytes; 420 } 421} 422 423// This is the type passed as the CompressorType argument of the compressed 424// blitter for the LATC format. The static functions required to be in this 425// struct are documented in SkTextureCompressor_Blitter.h 426struct CompressorLATC { 427 static inline void CompressA8Vertical(uint8_t* dst, const uint8_t block[]) { 428 compress_a8_latc_block<PackColumnMajor>(&dst, block, 4); 429 } 430 431 static inline void CompressA8Horizontal(uint8_t* dst, const uint8_t* src, 432 int srcRowBytes) { 433 compress_a8_latc_block<PackRowMajor>(&dst, src, srcRowBytes); 434 } 435 436#if PEDANTIC_BLIT_RECT 437 static inline void UpdateBlock(uint8_t* dst, const uint8_t* src, int srcRowBytes, 438 const uint8_t* mask) { 439 // Pack the mask 440 uint64_t cmpMask = 0; 441 for (int i = 0; i < 4; ++i) { 442 const uint32_t idx = *(reinterpret_cast<const uint32_t*>(src + i*srcRowBytes)); 443 cmpMask |= static_cast<uint64_t>(pack_index(idx)) << 12*i; 444 } 445 cmpMask = SkEndian_SwapLE64(cmpMask << 16); // avoid header 446 447 uint64_t cmpSrc; 448 uint8_t *cmpSrcPtr = reinterpret_cast<uint8_t*>(&cmpSrc); 449 compress_a8_latc_block<PackRowMajor>(&cmpSrcPtr, src, srcRowBytes); 450 451 // Mask out header 452 cmpSrc = cmpSrc & cmpMask; 453 454 // Read destination encoding 455 uint64_t *cmpDst = reinterpret_cast<uint64_t*>(dst); 456 457 // If the destination is the encoding for a blank block, then we need 458 // to properly set the header 459 if (0 == cmpDst) { 460 *cmpDst = SkTEndian_SwapLE64(0x24924924924900FFULL); 461 } 462 463 // Set the new indices 464 *cmpDst &= ~cmpMask; 465 *cmpDst |= cmpSrc; 466 } 467#endif // PEDANTIC_BLIT_RECT 468}; 469 470//////////////////////////////////////////////////////////////////////////////// 471 472namespace SkTextureCompressor { 473 474bool CompressA8ToLATC(uint8_t* dst, const uint8_t* src, int width, int height, int rowBytes) { 475#if COMPRESS_LATC_FAST 476 return compress_4x4_a8_latc(dst, src, width, height, rowBytes); 477#elif COMPRESS_LATC_SLOW 478 return compress_4x4_a8_to_64bit(dst, src, width, height, rowBytes, compress_latc_block); 479#else 480#error "Must choose either fast or slow LATC compression" 481#endif 482} 483 484SkBlitter* CreateLATCBlitter(int width, int height, void* outputBuffer, 485 SkTBlitterAllocator* allocator) { 486 if ((width % 4) != 0 || (height % 4) != 0) { 487 return NULL; 488 } 489 490#if COMPRESS_LATC_FAST 491 // Memset the output buffer to an encoding that decodes to zero. We must do this 492 // in order to avoid having uninitialized values in the buffer if the blitter 493 // decides not to write certain scanlines (and skip entire rows of blocks). 494 // In the case of LATC, if everything is zero, then LUM0 and LUM1 are also zero, 495 // and they will only be non-zero (0xFF) if the index is 7. So bzero will do just fine. 496 // (8 bytes per block) * (w * h / 16 blocks) = w * h / 2 497 sk_bzero(outputBuffer, width * height / 2); 498 499 return allocator->createT< 500 SkTCompressedAlphaBlitter<4, 8, CompressorLATC>, int, int, void* > 501 (width, height, outputBuffer); 502#elif COMPRESS_LATC_SLOW 503 // TODO (krajcevski) 504 return NULL; 505#endif 506} 507 508void DecompressLATC(uint8_t* dst, int dstRowBytes, const uint8_t* src, int width, int height) { 509 for (int j = 0; j < height; j += 4) { 510 for (int i = 0; i < width; i += 4) { 511 decompress_latc_block(dst + i, dstRowBytes, src); 512 src += 8; 513 } 514 dst += 4 * dstRowBytes; 515 } 516} 517 518} // SkTextureCompressor 519