lossless.c revision 4b2196c929b70f2cdc1c2556580d349db89356d8
1// Copyright 2012 Google Inc. All Rights Reserved. 2// 3// This code is licensed under the same terms as WebM: 4// Software License Agreement: http://www.webmproject.org/license/software/ 5// Additional IP Rights Grant: http://www.webmproject.org/license/additional/ 6// ----------------------------------------------------------------------------- 7// 8// Image transforms and color space conversion methods for lossless decoder. 9// 10// Authors: Vikas Arora (vikaas.arora@gmail.com) 11// Jyrki Alakuijala (jyrki@google.com) 12// Urvang Joshi (urvang@google.com) 13 14#if defined(__cplusplus) || defined(c_plusplus) 15extern "C" { 16#endif 17 18#include <math.h> 19#include <stdlib.h> 20#include "./lossless.h" 21#include "../dec/vp8li.h" 22#include "../dsp/yuv.h" 23#include "../dsp/dsp.h" 24 25#define MAX_DIFF_COST (1e30f) 26 27// lookup table for small values of log2(int) 28#define APPROX_LOG_MAX 4096 29#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086 30#define LOG_LOOKUP_IDX_MAX 256 31static const float kLog2Table[LOG_LOOKUP_IDX_MAX] = { 32 0.0000000000000000f, 0.0000000000000000f, 33 1.0000000000000000f, 1.5849625007211560f, 34 2.0000000000000000f, 2.3219280948873621f, 35 2.5849625007211560f, 2.8073549220576041f, 36 3.0000000000000000f, 3.1699250014423121f, 37 3.3219280948873621f, 3.4594316186372973f, 38 3.5849625007211560f, 3.7004397181410921f, 39 3.8073549220576041f, 3.9068905956085187f, 40 4.0000000000000000f, 4.0874628412503390f, 41 4.1699250014423121f, 4.2479275134435852f, 42 4.3219280948873626f, 4.3923174227787606f, 43 4.4594316186372973f, 4.5235619560570130f, 44 4.5849625007211560f, 4.6438561897747243f, 45 4.7004397181410917f, 4.7548875021634682f, 46 4.8073549220576037f, 4.8579809951275718f, 47 4.9068905956085187f, 4.9541963103868749f, 48 5.0000000000000000f, 5.0443941193584533f, 49 5.0874628412503390f, 5.1292830169449663f, 50 5.1699250014423121f, 5.2094533656289501f, 51 5.2479275134435852f, 5.2854022188622487f, 52 5.3219280948873626f, 5.3575520046180837f, 53 5.3923174227787606f, 5.4262647547020979f, 54 5.4594316186372973f, 5.4918530963296747f, 55 5.5235619560570130f, 5.5545888516776376f, 56 5.5849625007211560f, 5.6147098441152083f, 57 5.6438561897747243f, 5.6724253419714951f, 58 5.7004397181410917f, 5.7279204545631987f, 59 5.7548875021634682f, 5.7813597135246599f, 60 5.8073549220576037f, 5.8328900141647412f, 61 5.8579809951275718f, 5.8826430493618415f, 62 5.9068905956085187f, 5.9307373375628866f, 63 5.9541963103868749f, 5.9772799234999167f, 64 6.0000000000000000f, 6.0223678130284543f, 65 6.0443941193584533f, 6.0660891904577720f, 66 6.0874628412503390f, 6.1085244567781691f, 67 6.1292830169449663f, 6.1497471195046822f, 68 6.1699250014423121f, 6.1898245588800175f, 69 6.2094533656289501f, 6.2288186904958804f, 70 6.2479275134435852f, 6.2667865406949010f, 71 6.2854022188622487f, 6.3037807481771030f, 72 6.3219280948873626f, 6.3398500028846243f, 73 6.3575520046180837f, 6.3750394313469245f, 74 6.3923174227787606f, 6.4093909361377017f, 75 6.4262647547020979f, 6.4429434958487279f, 76 6.4594316186372973f, 6.4757334309663976f, 77 6.4918530963296747f, 6.5077946401986963f, 78 6.5235619560570130f, 6.5391588111080309f, 79 6.5545888516776376f, 6.5698556083309478f, 80 6.5849625007211560f, 6.5999128421871278f, 81 6.6147098441152083f, 6.6293566200796094f, 82 6.6438561897747243f, 6.6582114827517946f, 83 6.6724253419714951f, 6.6865005271832185f, 84 6.7004397181410917f, 6.7142455176661224f, 85 6.7279204545631987f, 6.7414669864011464f, 86 6.7548875021634682f, 6.7681843247769259f, 87 6.7813597135246599f, 6.7944158663501061f, 88 6.8073549220576037f, 6.8201789624151878f, 89 6.8328900141647412f, 6.8454900509443747f, 90 6.8579809951275718f, 6.8703647195834047f, 91 6.8826430493618415f, 6.8948177633079437f, 92 6.9068905956085187f, 6.9188632372745946f, 93 6.9307373375628866f, 6.9425145053392398f, 94 6.9541963103868749f, 6.9657842846620869f, 95 6.9772799234999167f, 6.9886846867721654f, 96 7.0000000000000000f, 7.0112272554232539f, 97 7.0223678130284543f, 7.0334230015374501f, 98 7.0443941193584533f, 7.0552824355011898f, 99 7.0660891904577720f, 7.0768155970508308f, 100 7.0874628412503390f, 7.0980320829605263f, 101 7.1085244567781691f, 7.1189410727235076f, 102 7.1292830169449663f, 7.1395513523987936f, 103 7.1497471195046822f, 7.1598713367783890f, 104 7.1699250014423121f, 7.1799090900149344f, 105 7.1898245588800175f, 7.1996723448363644f, 106 7.2094533656289501f, 7.2191685204621611f, 107 7.2288186904958804f, 7.2384047393250785f, 108 7.2479275134435852f, 7.2573878426926521f, 109 7.2667865406949010f, 7.2761244052742375f, 110 7.2854022188622487f, 7.2946207488916270f, 111 7.3037807481771030f, 7.3128829552843557f, 112 7.3219280948873626f, 7.3309168781146167f, 113 7.3398500028846243f, 7.3487281542310771f, 114 7.3575520046180837f, 7.3663222142458160f, 115 7.3750394313469245f, 7.3837042924740519f, 116 7.3923174227787606f, 7.4008794362821843f, 117 7.4093909361377017f, 7.4178525148858982f, 118 7.4262647547020979f, 7.4346282276367245f, 119 7.4429434958487279f, 7.4512111118323289f, 120 7.4594316186372973f, 7.4676055500829976f, 121 7.4757334309663976f, 7.4838157772642563f, 122 7.4918530963296747f, 7.4998458870832056f, 123 7.5077946401986963f, 7.5156998382840427f, 124 7.5235619560570130f, 7.5313814605163118f, 125 7.5391588111080309f, 7.5468944598876364f, 126 7.5545888516776376f, 7.5622424242210728f, 127 7.5698556083309478f, 7.5774288280357486f, 128 7.5849625007211560f, 7.5924570372680806f, 129 7.5999128421871278f, 7.6073303137496104f, 130 7.6147098441152083f, 7.6220518194563764f, 131 7.6293566200796094f, 7.6366246205436487f, 132 7.6438561897747243f, 7.6510516911789281f, 133 7.6582114827517946f, 7.6653359171851764f, 134 7.6724253419714951f, 7.6794800995054464f, 135 7.6865005271832185f, 7.6934869574993252f, 136 7.7004397181410917f, 7.7073591320808825f, 137 7.7142455176661224f, 7.7210991887071855f, 138 7.7279204545631987f, 7.7347096202258383f, 139 7.7414669864011464f, 7.7481928495894605f, 140 7.7548875021634682f, 7.7615512324444795f, 141 7.7681843247769259f, 7.7747870596011736f, 142 7.7813597135246599f, 7.7879025593914317f, 143 7.7944158663501061f, 7.8008998999203047f, 144 7.8073549220576037f, 7.8137811912170374f, 145 7.8201789624151878f, 7.8265484872909150f, 146 7.8328900141647412f, 7.8392037880969436f, 147 7.8454900509443747f, 7.8517490414160571f, 148 7.8579809951275718f, 7.8641861446542797f, 149 7.8703647195834047f, 7.8765169465649993f, 150 7.8826430493618415f, 7.8887432488982591f, 151 7.8948177633079437f, 7.9008668079807486f, 152 7.9068905956085187f, 7.9128893362299619f, 153 7.9188632372745946f, 7.9248125036057812f, 154 7.9307373375628866f, 7.9366379390025709f, 155 7.9425145053392398f, 7.9483672315846778f, 156 7.9541963103868749f, 7.9600019320680805f, 157 7.9657842846620869f, 7.9715435539507719f, 158 7.9772799234999167f, 7.9829935746943103f, 159 7.9886846867721654f, 7.9943534368588577f 160}; 161 162float VP8LFastLog2(int v) { 163 if (v < LOG_LOOKUP_IDX_MAX) { 164 return kLog2Table[v]; 165 } else if (v < APPROX_LOG_MAX) { 166 int log_cnt = 0; 167 while (v >= LOG_LOOKUP_IDX_MAX) { 168 ++log_cnt; 169 v = v >> 1; 170 } 171 return kLog2Table[v] + (float)log_cnt; 172 } else { 173 return (float)(LOG_2_RECIPROCAL * log((double)v)); 174 } 175} 176 177//------------------------------------------------------------------------------ 178// Image transforms. 179 180// In-place sum of each component with mod 256. 181static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) { 182 const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u); 183 const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu); 184 *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu); 185} 186 187static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) { 188 return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1); 189} 190 191static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) { 192 return Average2(Average2(a0, a2), a1); 193} 194 195static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1, 196 uint32_t a2, uint32_t a3) { 197 return Average2(Average2(a0, a1), Average2(a2, a3)); 198} 199 200static WEBP_INLINE uint32_t Clip255(uint32_t a) { 201 if (a < 256) { 202 return a; 203 } 204 // return 0, when a is a negative integer. 205 // return 255, when a is positive. 206 return ~a >> 24; 207} 208 209static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) { 210 return Clip255(a + b - c); 211} 212 213static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1, 214 uint32_t c2) { 215 const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24); 216 const int r = AddSubtractComponentFull((c0 >> 16) & 0xff, 217 (c1 >> 16) & 0xff, 218 (c2 >> 16) & 0xff); 219 const int g = AddSubtractComponentFull((c0 >> 8) & 0xff, 220 (c1 >> 8) & 0xff, 221 (c2 >> 8) & 0xff); 222 const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff); 223 return (a << 24) | (r << 16) | (g << 8) | b; 224} 225 226static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) { 227 return Clip255(a + (a - b) / 2); 228} 229 230static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1, 231 uint32_t c2) { 232 const uint32_t ave = Average2(c0, c1); 233 const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24); 234 const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff); 235 const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff); 236 const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff); 237 return (a << 24) | (r << 16) | (g << 8) | b; 238} 239 240static WEBP_INLINE int Sub3(int a, int b, int c) { 241 const int pa = b - c; 242 const int pb = a - c; 243 return abs(pa) - abs(pb); 244} 245 246static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) { 247 const int pa_minus_pb = 248 Sub3((a >> 24) , (b >> 24) , (c >> 24) ) + 249 Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) + 250 Sub3((a >> 8) & 0xff, (b >> 8) & 0xff, (c >> 8) & 0xff) + 251 Sub3((a ) & 0xff, (b ) & 0xff, (c ) & 0xff); 252 253 return (pa_minus_pb <= 0) ? a : b; 254} 255 256//------------------------------------------------------------------------------ 257// Predictors 258 259static uint32_t Predictor0(uint32_t left, const uint32_t* const top) { 260 (void)top; 261 (void)left; 262 return ARGB_BLACK; 263} 264static uint32_t Predictor1(uint32_t left, const uint32_t* const top) { 265 (void)top; 266 return left; 267} 268static uint32_t Predictor2(uint32_t left, const uint32_t* const top) { 269 (void)left; 270 return top[0]; 271} 272static uint32_t Predictor3(uint32_t left, const uint32_t* const top) { 273 (void)left; 274 return top[1]; 275} 276static uint32_t Predictor4(uint32_t left, const uint32_t* const top) { 277 (void)left; 278 return top[-1]; 279} 280static uint32_t Predictor5(uint32_t left, const uint32_t* const top) { 281 const uint32_t pred = Average3(left, top[0], top[1]); 282 return pred; 283} 284static uint32_t Predictor6(uint32_t left, const uint32_t* const top) { 285 const uint32_t pred = Average2(left, top[-1]); 286 return pred; 287} 288static uint32_t Predictor7(uint32_t left, const uint32_t* const top) { 289 const uint32_t pred = Average2(left, top[0]); 290 return pred; 291} 292static uint32_t Predictor8(uint32_t left, const uint32_t* const top) { 293 const uint32_t pred = Average2(top[-1], top[0]); 294 (void)left; 295 return pred; 296} 297static uint32_t Predictor9(uint32_t left, const uint32_t* const top) { 298 const uint32_t pred = Average2(top[0], top[1]); 299 (void)left; 300 return pred; 301} 302static uint32_t Predictor10(uint32_t left, const uint32_t* const top) { 303 const uint32_t pred = Average4(left, top[-1], top[0], top[1]); 304 return pred; 305} 306static uint32_t Predictor11(uint32_t left, const uint32_t* const top) { 307 const uint32_t pred = Select(top[0], left, top[-1]); 308 return pred; 309} 310static uint32_t Predictor12(uint32_t left, const uint32_t* const top) { 311 const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]); 312 return pred; 313} 314static uint32_t Predictor13(uint32_t left, const uint32_t* const top) { 315 const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]); 316 return pred; 317} 318 319typedef uint32_t (*PredictorFunc)(uint32_t left, const uint32_t* const top); 320static const PredictorFunc kPredictors[16] = { 321 Predictor0, Predictor1, Predictor2, Predictor3, 322 Predictor4, Predictor5, Predictor6, Predictor7, 323 Predictor8, Predictor9, Predictor10, Predictor11, 324 Predictor12, Predictor13, 325 Predictor0, Predictor0 // <- padding security sentinels 326}; 327 328// TODO(vikasa): Replace 256 etc with defines. 329static float PredictionCostSpatial(const int* counts, 330 int weight_0, double exp_val) { 331 const int significant_symbols = 16; 332 const double exp_decay_factor = 0.6; 333 double bits = weight_0 * counts[0]; 334 int i; 335 for (i = 1; i < significant_symbols; ++i) { 336 bits += exp_val * (counts[i] + counts[256 - i]); 337 exp_val *= exp_decay_factor; 338 } 339 return (float)(-0.1 * bits); 340} 341 342// Compute the Shanon's entropy: Sum(p*log2(p)) 343static float ShannonEntropy(const int* const array, int n) { 344 int i; 345 float retval = 0.f; 346 int sum = 0; 347 for (i = 0; i < n; ++i) { 348 if (array[i] != 0) { 349 sum += array[i]; 350 retval -= VP8LFastSLog2(array[i]); 351 } 352 } 353 retval += VP8LFastSLog2(sum); 354 return retval; 355} 356 357static float PredictionCostSpatialHistogram(int accumulated[4][256], 358 int tile[4][256]) { 359 int i; 360 int k; 361 int combo[256]; 362 double retval = 0; 363 for (i = 0; i < 4; ++i) { 364 const double exp_val = 0.94; 365 retval += PredictionCostSpatial(&tile[i][0], 1, exp_val); 366 retval += ShannonEntropy(&tile[i][0], 256); 367 for (k = 0; k < 256; ++k) { 368 combo[k] = accumulated[i][k] + tile[i][k]; 369 } 370 retval += ShannonEntropy(&combo[0], 256); 371 } 372 return (float)retval; 373} 374 375static int GetBestPredictorForTile(int width, int height, 376 int tile_x, int tile_y, int bits, 377 int accumulated[4][256], 378 const uint32_t* const argb_scratch) { 379 const int kNumPredModes = 14; 380 const int col_start = tile_x << bits; 381 const int row_start = tile_y << bits; 382 const int tile_size = 1 << bits; 383 const int ymax = (tile_size <= height - row_start) ? 384 tile_size : height - row_start; 385 const int xmax = (tile_size <= width - col_start) ? 386 tile_size : width - col_start; 387 int histo[4][256]; 388 float best_diff = MAX_DIFF_COST; 389 int best_mode = 0; 390 391 int mode; 392 for (mode = 0; mode < kNumPredModes; ++mode) { 393 const uint32_t* current_row = argb_scratch; 394 const PredictorFunc pred_func = kPredictors[mode]; 395 float cur_diff; 396 int y; 397 memset(&histo[0][0], 0, sizeof(histo)); 398 for (y = 0; y < ymax; ++y) { 399 int x; 400 const int row = row_start + y; 401 const uint32_t* const upper_row = current_row; 402 current_row = upper_row + width; 403 for (x = 0; x < xmax; ++x) { 404 const int col = col_start + x; 405 uint32_t predict; 406 uint32_t predict_diff; 407 if (row == 0) { 408 predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left. 409 } else if (col == 0) { 410 predict = upper_row[col]; // Top. 411 } else { 412 predict = pred_func(current_row[col - 1], upper_row + col); 413 } 414 predict_diff = VP8LSubPixels(current_row[col], predict); 415 ++histo[0][predict_diff >> 24]; 416 ++histo[1][((predict_diff >> 16) & 0xff)]; 417 ++histo[2][((predict_diff >> 8) & 0xff)]; 418 ++histo[3][(predict_diff & 0xff)]; 419 } 420 } 421 cur_diff = PredictionCostSpatialHistogram(accumulated, histo); 422 if (cur_diff < best_diff) { 423 best_diff = cur_diff; 424 best_mode = mode; 425 } 426 } 427 428 return best_mode; 429} 430 431static void CopyTileWithPrediction(int width, int height, 432 int tile_x, int tile_y, int bits, int mode, 433 const uint32_t* const argb_scratch, 434 uint32_t* const argb) { 435 const int col_start = tile_x << bits; 436 const int row_start = tile_y << bits; 437 const int tile_size = 1 << bits; 438 const int ymax = (tile_size <= height - row_start) ? 439 tile_size : height - row_start; 440 const int xmax = (tile_size <= width - col_start) ? 441 tile_size : width - col_start; 442 const PredictorFunc pred_func = kPredictors[mode]; 443 const uint32_t* current_row = argb_scratch; 444 445 int y; 446 for (y = 0; y < ymax; ++y) { 447 int x; 448 const int row = row_start + y; 449 const uint32_t* const upper_row = current_row; 450 current_row = upper_row + width; 451 for (x = 0; x < xmax; ++x) { 452 const int col = col_start + x; 453 const int pix = row * width + col; 454 uint32_t predict; 455 if (row == 0) { 456 predict = (col == 0) ? ARGB_BLACK : current_row[col - 1]; // Left. 457 } else if (col == 0) { 458 predict = upper_row[col]; // Top. 459 } else { 460 predict = pred_func(current_row[col - 1], upper_row + col); 461 } 462 argb[pix] = VP8LSubPixels(current_row[col], predict); 463 } 464 } 465} 466 467void VP8LResidualImage(int width, int height, int bits, 468 uint32_t* const argb, uint32_t* const argb_scratch, 469 uint32_t* const image) { 470 const int max_tile_size = 1 << bits; 471 const int tiles_per_row = VP8LSubSampleSize(width, bits); 472 const int tiles_per_col = VP8LSubSampleSize(height, bits); 473 uint32_t* const upper_row = argb_scratch; 474 uint32_t* const current_tile_rows = argb_scratch + width; 475 int tile_y; 476 int histo[4][256]; 477 memset(histo, 0, sizeof(histo)); 478 for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) { 479 const int tile_y_offset = tile_y * max_tile_size; 480 const int this_tile_height = 481 (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset; 482 int tile_x; 483 if (tile_y > 0) { 484 memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width, 485 width * sizeof(*upper_row)); 486 } 487 memcpy(current_tile_rows, &argb[tile_y_offset * width], 488 this_tile_height * width * sizeof(*current_tile_rows)); 489 for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) { 490 int pred; 491 int y; 492 const int tile_x_offset = tile_x * max_tile_size; 493 int all_x_max = tile_x_offset + max_tile_size; 494 if (all_x_max > width) { 495 all_x_max = width; 496 } 497 pred = GetBestPredictorForTile(width, height, tile_x, tile_y, bits, histo, 498 argb_scratch); 499 image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8); 500 CopyTileWithPrediction(width, height, tile_x, tile_y, bits, pred, 501 argb_scratch, argb); 502 for (y = 0; y < max_tile_size; ++y) { 503 int ix; 504 int all_x; 505 int all_y = tile_y_offset + y; 506 if (all_y >= height) { 507 break; 508 } 509 ix = all_y * width + tile_x_offset; 510 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) { 511 const uint32_t a = argb[ix]; 512 ++histo[0][a >> 24]; 513 ++histo[1][((a >> 16) & 0xff)]; 514 ++histo[2][((a >> 8) & 0xff)]; 515 ++histo[3][(a & 0xff)]; 516 } 517 } 518 } 519 } 520} 521 522// Inverse prediction. 523static void PredictorInverseTransform(const VP8LTransform* const transform, 524 int y_start, int y_end, uint32_t* data) { 525 const int width = transform->xsize_; 526 if (y_start == 0) { // First Row follows the L (mode=1) mode. 527 int x; 528 const uint32_t pred0 = Predictor0(data[-1], NULL); 529 AddPixelsEq(data, pred0); 530 for (x = 1; x < width; ++x) { 531 const uint32_t pred1 = Predictor1(data[x - 1], NULL); 532 AddPixelsEq(data + x, pred1); 533 } 534 data += width; 535 ++y_start; 536 } 537 538 { 539 int y = y_start; 540 const int mask = (1 << transform->bits_) - 1; 541 const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_); 542 const uint32_t* pred_mode_base = 543 transform->data_ + (y >> transform->bits_) * tiles_per_row; 544 545 while (y < y_end) { 546 int x; 547 const uint32_t pred2 = Predictor2(data[-1], data - width); 548 const uint32_t* pred_mode_src = pred_mode_base; 549 PredictorFunc pred_func; 550 551 // First pixel follows the T (mode=2) mode. 552 AddPixelsEq(data, pred2); 553 554 // .. the rest: 555 pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf]; 556 for (x = 1; x < width; ++x) { 557 uint32_t pred; 558 if ((x & mask) == 0) { // start of tile. Read predictor function. 559 pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf]; 560 } 561 pred = pred_func(data[x - 1], data + x - width); 562 AddPixelsEq(data + x, pred); 563 } 564 data += width; 565 ++y; 566 if ((y & mask) == 0) { // Use the same mask, since tiles are squares. 567 pred_mode_base += tiles_per_row; 568 } 569 } 570 } 571} 572 573void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs) { 574 int i; 575 for (i = 0; i < num_pixs; ++i) { 576 const uint32_t argb = argb_data[i]; 577 const uint32_t green = (argb >> 8) & 0xff; 578 const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff; 579 const uint32_t new_b = ((argb & 0xff) - green) & 0xff; 580 argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b; 581 } 582} 583 584// Add green to blue and red channels (i.e. perform the inverse transform of 585// 'subtract green'). 586static void AddGreenToBlueAndRed(const VP8LTransform* const transform, 587 int y_start, int y_end, uint32_t* data) { 588 const int width = transform->xsize_; 589 const uint32_t* const data_end = data + (y_end - y_start) * width; 590 while (data < data_end) { 591 const uint32_t argb = *data; 592 // "* 0001001u" is equivalent to "(green << 16) + green)" 593 const uint32_t green = ((argb >> 8) & 0xff); 594 uint32_t red_blue = (argb & 0x00ff00ffu); 595 red_blue += (green << 16) | green; 596 red_blue &= 0x00ff00ffu; 597 *data++ = (argb & 0xff00ff00u) | red_blue; 598 } 599} 600 601typedef struct { 602 // Note: the members are uint8_t, so that any negative values are 603 // automatically converted to "mod 256" values. 604 uint8_t green_to_red_; 605 uint8_t green_to_blue_; 606 uint8_t red_to_blue_; 607} Multipliers; 608 609static WEBP_INLINE void MultipliersClear(Multipliers* m) { 610 m->green_to_red_ = 0; 611 m->green_to_blue_ = 0; 612 m->red_to_blue_ = 0; 613} 614 615static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred, 616 int8_t color) { 617 return (uint32_t)((int)(color_pred) * color) >> 5; 618} 619 620static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code, 621 Multipliers* const m) { 622 m->green_to_red_ = (color_code >> 0) & 0xff; 623 m->green_to_blue_ = (color_code >> 8) & 0xff; 624 m->red_to_blue_ = (color_code >> 16) & 0xff; 625} 626 627static WEBP_INLINE uint32_t MultipliersToColorCode(Multipliers* const m) { 628 return 0xff000000u | 629 ((uint32_t)(m->red_to_blue_) << 16) | 630 ((uint32_t)(m->green_to_blue_) << 8) | 631 m->green_to_red_; 632} 633 634static WEBP_INLINE uint32_t TransformColor(const Multipliers* const m, 635 uint32_t argb, int inverse) { 636 const uint32_t green = argb >> 8; 637 const uint32_t red = argb >> 16; 638 uint32_t new_red = red; 639 uint32_t new_blue = argb; 640 641 if (inverse) { 642 new_red += ColorTransformDelta(m->green_to_red_, green); 643 new_red &= 0xff; 644 new_blue += ColorTransformDelta(m->green_to_blue_, green); 645 new_blue += ColorTransformDelta(m->red_to_blue_, new_red); 646 new_blue &= 0xff; 647 } else { 648 new_red -= ColorTransformDelta(m->green_to_red_, green); 649 new_red &= 0xff; 650 new_blue -= ColorTransformDelta(m->green_to_blue_, green); 651 new_blue -= ColorTransformDelta(m->red_to_blue_, red); 652 new_blue &= 0xff; 653 } 654 return (argb & 0xff00ff00u) | (new_red << 16) | (new_blue); 655} 656 657static WEBP_INLINE int SkipRepeatedPixels(const uint32_t* const argb, 658 int ix, int xsize) { 659 const uint32_t v = argb[ix]; 660 if (ix >= xsize + 3) { 661 if (v == argb[ix - xsize] && 662 argb[ix - 1] == argb[ix - xsize - 1] && 663 argb[ix - 2] == argb[ix - xsize - 2] && 664 argb[ix - 3] == argb[ix - xsize - 3]) { 665 return 1; 666 } 667 return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1]; 668 } else if (ix >= 3) { 669 return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1]; 670 } 671 return 0; 672} 673 674static float PredictionCostCrossColor(const int accumulated[256], 675 const int counts[256]) { 676 // Favor low entropy, locally and globally. 677 int i; 678 int combo[256]; 679 for (i = 0; i < 256; ++i) { 680 combo[i] = accumulated[i] + counts[i]; 681 } 682 return ShannonEntropy(combo, 256) + 683 ShannonEntropy(counts, 256) + 684 PredictionCostSpatial(counts, 3, 2.4); // Favor small absolute values. 685} 686 687static Multipliers GetBestColorTransformForTile( 688 int tile_x, int tile_y, int bits, 689 Multipliers prevX, 690 Multipliers prevY, 691 int step, int xsize, int ysize, 692 int* accumulated_red_histo, 693 int* accumulated_blue_histo, 694 const uint32_t* const argb) { 695 float best_diff = MAX_DIFF_COST; 696 float cur_diff; 697 const int halfstep = step / 2; 698 const int max_tile_size = 1 << bits; 699 const int tile_y_offset = tile_y * max_tile_size; 700 const int tile_x_offset = tile_x * max_tile_size; 701 int green_to_red; 702 int green_to_blue; 703 int red_to_blue; 704 int all_x_max = tile_x_offset + max_tile_size; 705 int all_y_max = tile_y_offset + max_tile_size; 706 Multipliers best_tx; 707 MultipliersClear(&best_tx); 708 if (all_x_max > xsize) { 709 all_x_max = xsize; 710 } 711 if (all_y_max > ysize) { 712 all_y_max = ysize; 713 } 714 for (green_to_red = -64; green_to_red <= 64; green_to_red += halfstep) { 715 int histo[256] = { 0 }; 716 int all_y; 717 Multipliers tx; 718 MultipliersClear(&tx); 719 tx.green_to_red_ = green_to_red & 0xff; 720 721 for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) { 722 uint32_t predict; 723 int ix = all_y * xsize + tile_x_offset; 724 int all_x; 725 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) { 726 if (SkipRepeatedPixels(argb, ix, xsize)) { 727 continue; 728 } 729 predict = TransformColor(&tx, argb[ix], 0); 730 ++histo[(predict >> 16) & 0xff]; // red. 731 } 732 } 733 cur_diff = PredictionCostCrossColor(&accumulated_red_histo[0], &histo[0]); 734 if (tx.green_to_red_ == prevX.green_to_red_) { 735 cur_diff -= 3; // favor keeping the areas locally similar 736 } 737 if (tx.green_to_red_ == prevY.green_to_red_) { 738 cur_diff -= 3; // favor keeping the areas locally similar 739 } 740 if (tx.green_to_red_ == 0) { 741 cur_diff -= 3; 742 } 743 if (cur_diff < best_diff) { 744 best_diff = cur_diff; 745 best_tx = tx; 746 } 747 } 748 best_diff = MAX_DIFF_COST; 749 green_to_red = best_tx.green_to_red_; 750 for (green_to_blue = -32; green_to_blue <= 32; green_to_blue += step) { 751 for (red_to_blue = -32; red_to_blue <= 32; red_to_blue += step) { 752 int all_y; 753 int histo[256] = { 0 }; 754 Multipliers tx; 755 tx.green_to_red_ = green_to_red; 756 tx.green_to_blue_ = green_to_blue; 757 tx.red_to_blue_ = red_to_blue; 758 for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) { 759 uint32_t predict; 760 int all_x; 761 int ix = all_y * xsize + tile_x_offset; 762 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) { 763 if (SkipRepeatedPixels(argb, ix, xsize)) { 764 continue; 765 } 766 predict = TransformColor(&tx, argb[ix], 0); 767 ++histo[predict & 0xff]; // blue. 768 } 769 } 770 cur_diff = 771 PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]); 772 if (tx.green_to_blue_ == prevX.green_to_blue_) { 773 cur_diff -= 3; // favor keeping the areas locally similar 774 } 775 if (tx.green_to_blue_ == prevY.green_to_blue_) { 776 cur_diff -= 3; // favor keeping the areas locally similar 777 } 778 if (tx.red_to_blue_ == prevX.red_to_blue_) { 779 cur_diff -= 3; // favor keeping the areas locally similar 780 } 781 if (tx.red_to_blue_ == prevY.red_to_blue_) { 782 cur_diff -= 3; // favor keeping the areas locally similar 783 } 784 if (tx.green_to_blue_ == 0) { 785 cur_diff -= 3; 786 } 787 if (tx.red_to_blue_ == 0) { 788 cur_diff -= 3; 789 } 790 if (cur_diff < best_diff) { 791 best_diff = cur_diff; 792 best_tx = tx; 793 } 794 } 795 } 796 return best_tx; 797} 798 799static void CopyTileWithColorTransform(int xsize, int ysize, 800 int tile_x, int tile_y, int bits, 801 Multipliers color_transform, 802 uint32_t* const argb) { 803 int y; 804 int xscan = 1 << bits; 805 int yscan = 1 << bits; 806 tile_x <<= bits; 807 tile_y <<= bits; 808 if (xscan > xsize - tile_x) { 809 xscan = xsize - tile_x; 810 } 811 if (yscan > ysize - tile_y) { 812 yscan = ysize - tile_y; 813 } 814 yscan += tile_y; 815 for (y = tile_y; y < yscan; ++y) { 816 int ix = y * xsize + tile_x; 817 const int end_ix = ix + xscan; 818 for (; ix < end_ix; ++ix) { 819 argb[ix] = TransformColor(&color_transform, argb[ix], 0); 820 } 821 } 822} 823 824void VP8LColorSpaceTransform(int width, int height, int bits, int step, 825 uint32_t* const argb, uint32_t* image) { 826 const int max_tile_size = 1 << bits; 827 int tile_xsize = VP8LSubSampleSize(width, bits); 828 int tile_ysize = VP8LSubSampleSize(height, bits); 829 int accumulated_red_histo[256] = { 0 }; 830 int accumulated_blue_histo[256] = { 0 }; 831 int tile_y; 832 int tile_x; 833 Multipliers prevX; 834 Multipliers prevY; 835 MultipliersClear(&prevY); 836 MultipliersClear(&prevX); 837 for (tile_y = 0; tile_y < tile_ysize; ++tile_y) { 838 for (tile_x = 0; tile_x < tile_xsize; ++tile_x) { 839 Multipliers color_transform; 840 int all_x_max; 841 int y; 842 const int tile_y_offset = tile_y * max_tile_size; 843 const int tile_x_offset = tile_x * max_tile_size; 844 if (tile_y != 0) { 845 ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX); 846 ColorCodeToMultipliers(image[(tile_y - 1) * tile_xsize + tile_x], 847 &prevY); 848 } else if (tile_x != 0) { 849 ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX); 850 } 851 color_transform = 852 GetBestColorTransformForTile(tile_x, tile_y, bits, 853 prevX, prevY, 854 step, width, height, 855 &accumulated_red_histo[0], 856 &accumulated_blue_histo[0], 857 argb); 858 image[tile_y * tile_xsize + tile_x] = 859 MultipliersToColorCode(&color_transform); 860 CopyTileWithColorTransform(width, height, tile_x, tile_y, bits, 861 color_transform, argb); 862 863 // Gather accumulated histogram data. 864 all_x_max = tile_x_offset + max_tile_size; 865 if (all_x_max > width) { 866 all_x_max = width; 867 } 868 for (y = 0; y < max_tile_size; ++y) { 869 int ix; 870 int all_x; 871 int all_y = tile_y_offset + y; 872 if (all_y >= height) { 873 break; 874 } 875 ix = all_y * width + tile_x_offset; 876 for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) { 877 if (ix >= 2 && 878 argb[ix] == argb[ix - 2] && 879 argb[ix] == argb[ix - 1]) { 880 continue; // repeated pixels are handled by backward references 881 } 882 if (ix >= width + 2 && 883 argb[ix - 2] == argb[ix - width - 2] && 884 argb[ix - 1] == argb[ix - width - 1] && 885 argb[ix] == argb[ix - width]) { 886 continue; // repeated pixels are handled by backward references 887 } 888 ++accumulated_red_histo[(argb[ix] >> 16) & 0xff]; 889 ++accumulated_blue_histo[argb[ix] & 0xff]; 890 } 891 } 892 } 893 } 894} 895 896// Color space inverse transform. 897static void ColorSpaceInverseTransform(const VP8LTransform* const transform, 898 int y_start, int y_end, uint32_t* data) { 899 const int width = transform->xsize_; 900 const int mask = (1 << transform->bits_) - 1; 901 const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_); 902 int y = y_start; 903 const uint32_t* pred_row = 904 transform->data_ + (y >> transform->bits_) * tiles_per_row; 905 906 while (y < y_end) { 907 const uint32_t* pred = pred_row; 908 Multipliers m = { 0, 0, 0 }; 909 int x; 910 911 for (x = 0; x < width; ++x) { 912 if ((x & mask) == 0) ColorCodeToMultipliers(*pred++, &m); 913 data[x] = TransformColor(&m, data[x], 1); 914 } 915 data += width; 916 ++y; 917 if ((y & mask) == 0) pred_row += tiles_per_row;; 918 } 919} 920 921// Separate out pixels packed together using pixel-bundling. 922static void ColorIndexInverseTransform( 923 const VP8LTransform* const transform, 924 int y_start, int y_end, const uint32_t* src, uint32_t* dst) { 925 int y; 926 const int bits_per_pixel = 8 >> transform->bits_; 927 const int width = transform->xsize_; 928 const uint32_t* const color_map = transform->data_; 929 if (bits_per_pixel < 8) { 930 const int pixels_per_byte = 1 << transform->bits_; 931 const int count_mask = pixels_per_byte - 1; 932 const uint32_t bit_mask = (1 << bits_per_pixel) - 1; 933 for (y = y_start; y < y_end; ++y) { 934 uint32_t packed_pixels = 0; 935 int x; 936 for (x = 0; x < width; ++x) { 937 // We need to load fresh 'packed_pixels' once every 'pixels_per_byte' 938 // increments of x. Fortunately, pixels_per_byte is a power of 2, so 939 // can just use a mask for that, instead of decrementing a counter. 940 if ((x & count_mask) == 0) packed_pixels = ((*src++) >> 8) & 0xff; 941 *dst++ = color_map[packed_pixels & bit_mask]; 942 packed_pixels >>= bits_per_pixel; 943 } 944 } 945 } else { 946 for (y = y_start; y < y_end; ++y) { 947 int x; 948 for (x = 0; x < width; ++x) { 949 *dst++ = color_map[((*src++) >> 8) & 0xff]; 950 } 951 } 952 } 953} 954 955void VP8LInverseTransform(const VP8LTransform* const transform, 956 int row_start, int row_end, 957 const uint32_t* const in, uint32_t* const out) { 958 assert(row_start < row_end); 959 assert(row_end <= transform->ysize_); 960 switch (transform->type_) { 961 case SUBTRACT_GREEN: 962 AddGreenToBlueAndRed(transform, row_start, row_end, out); 963 break; 964 case PREDICTOR_TRANSFORM: 965 PredictorInverseTransform(transform, row_start, row_end, out); 966 if (row_end != transform->ysize_) { 967 // The last predicted row in this iteration will be the top-pred row 968 // for the first row in next iteration. 969 const int width = transform->xsize_; 970 memcpy(out - width, out + (row_end - row_start - 1) * width, 971 width * sizeof(*out)); 972 } 973 break; 974 case CROSS_COLOR_TRANSFORM: 975 ColorSpaceInverseTransform(transform, row_start, row_end, out); 976 break; 977 case COLOR_INDEXING_TRANSFORM: 978 if (in == out && transform->bits_ > 0) { 979 // Move packed pixels to the end of unpacked region, so that unpacking 980 // can occur seamlessly. 981 // Also, note that this is the only transform that applies on 982 // the effective width of VP8LSubSampleSize(xsize_, bits_). All other 983 // transforms work on effective width of xsize_. 984 const int out_stride = (row_end - row_start) * transform->xsize_; 985 const int in_stride = (row_end - row_start) * 986 VP8LSubSampleSize(transform->xsize_, transform->bits_); 987 uint32_t* const src = out + out_stride - in_stride; 988 memmove(src, out, in_stride * sizeof(*src)); 989 ColorIndexInverseTransform(transform, row_start, row_end, src, out); 990 } else { 991 ColorIndexInverseTransform(transform, row_start, row_end, in, out); 992 } 993 break; 994 } 995} 996 997//------------------------------------------------------------------------------ 998// Color space conversion. 999 1000static int is_big_endian(void) { 1001 static const union { 1002 uint16_t w; 1003 uint8_t b[2]; 1004 } tmp = { 1 }; 1005 return (tmp.b[0] != 1); 1006} 1007 1008static void ConvertBGRAToRGB(const uint32_t* src, 1009 int num_pixels, uint8_t* dst) { 1010 const uint32_t* const src_end = src + num_pixels; 1011 while (src < src_end) { 1012 const uint32_t argb = *src++; 1013 *dst++ = (argb >> 16) & 0xff; 1014 *dst++ = (argb >> 8) & 0xff; 1015 *dst++ = (argb >> 0) & 0xff; 1016 } 1017} 1018 1019static void ConvertBGRAToRGBA(const uint32_t* src, 1020 int num_pixels, uint8_t* dst) { 1021 const uint32_t* const src_end = src + num_pixels; 1022 while (src < src_end) { 1023 const uint32_t argb = *src++; 1024 *dst++ = (argb >> 16) & 0xff; 1025 *dst++ = (argb >> 8) & 0xff; 1026 *dst++ = (argb >> 0) & 0xff; 1027 *dst++ = (argb >> 24) & 0xff; 1028 } 1029} 1030 1031static void ConvertBGRAToRGBA4444(const uint32_t* src, 1032 int num_pixels, uint8_t* dst) { 1033 const uint32_t* const src_end = src + num_pixels; 1034 while (src < src_end) { 1035 const uint32_t argb = *src++; 1036 const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf); 1037 const uint8_t ba = ((argb >> 0) & 0xf0) | ((argb >> 28) & 0xf); 1038#ifdef WEBP_SWAP_16BIT_CSP 1039 *dst++ = ba; 1040 *dst++ = rg; 1041#else 1042 *dst++ = rg; 1043 *dst++ = ba; 1044#endif 1045 } 1046} 1047 1048static void ConvertBGRAToRGB565(const uint32_t* src, 1049 int num_pixels, uint8_t* dst) { 1050 const uint32_t* const src_end = src + num_pixels; 1051 while (src < src_end) { 1052 const uint32_t argb = *src++; 1053 const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7); 1054 const uint8_t gb = ((argb >> 5) & 0xe0) | ((argb >> 3) & 0x1f); 1055#ifdef WEBP_SWAP_16BIT_CSP 1056 *dst++ = gb; 1057 *dst++ = rg; 1058#else 1059 *dst++ = rg; 1060 *dst++ = gb; 1061#endif 1062 } 1063} 1064 1065static void ConvertBGRAToBGR(const uint32_t* src, 1066 int num_pixels, uint8_t* dst) { 1067 const uint32_t* const src_end = src + num_pixels; 1068 while (src < src_end) { 1069 const uint32_t argb = *src++; 1070 *dst++ = (argb >> 0) & 0xff; 1071 *dst++ = (argb >> 8) & 0xff; 1072 *dst++ = (argb >> 16) & 0xff; 1073 } 1074} 1075 1076static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst, 1077 int swap_on_big_endian) { 1078 if (is_big_endian() == swap_on_big_endian) { 1079 const uint32_t* const src_end = src + num_pixels; 1080 while (src < src_end) { 1081 uint32_t argb = *src++; 1082#if !defined(__BIG_ENDIAN__) && (defined(__i386__) || defined(__x86_64__)) 1083 __asm__ volatile("bswap %0" : "=r"(argb) : "0"(argb)); 1084 *(uint32_t*)dst = argb; 1085 dst += sizeof(argb); 1086#elif !defined(__BIG_ENDIAN__) && defined(_MSC_VER) 1087 argb = _byteswap_ulong(argb); 1088 *(uint32_t*)dst = argb; 1089 dst += sizeof(argb); 1090#else 1091 *dst++ = (argb >> 24) & 0xff; 1092 *dst++ = (argb >> 16) & 0xff; 1093 *dst++ = (argb >> 8) & 0xff; 1094 *dst++ = (argb >> 0) & 0xff; 1095#endif 1096 } 1097 } else { 1098 memcpy(dst, src, num_pixels * sizeof(*src)); 1099 } 1100} 1101 1102void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels, 1103 WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) { 1104 switch (out_colorspace) { 1105 case MODE_RGB: 1106 ConvertBGRAToRGB(in_data, num_pixels, rgba); 1107 break; 1108 case MODE_RGBA: 1109 ConvertBGRAToRGBA(in_data, num_pixels, rgba); 1110 break; 1111 case MODE_rgbA: 1112 ConvertBGRAToRGBA(in_data, num_pixels, rgba); 1113 WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0); 1114 break; 1115 case MODE_BGR: 1116 ConvertBGRAToBGR(in_data, num_pixels, rgba); 1117 break; 1118 case MODE_BGRA: 1119 CopyOrSwap(in_data, num_pixels, rgba, 1); 1120 break; 1121 case MODE_bgrA: 1122 CopyOrSwap(in_data, num_pixels, rgba, 1); 1123 WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0); 1124 break; 1125 case MODE_ARGB: 1126 CopyOrSwap(in_data, num_pixels, rgba, 0); 1127 break; 1128 case MODE_Argb: 1129 CopyOrSwap(in_data, num_pixels, rgba, 0); 1130 WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0); 1131 break; 1132 case MODE_RGBA_4444: 1133 ConvertBGRAToRGBA4444(in_data, num_pixels, rgba); 1134 break; 1135 case MODE_rgbA_4444: 1136 ConvertBGRAToRGBA4444(in_data, num_pixels, rgba); 1137 WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0); 1138 break; 1139 case MODE_RGB_565: 1140 ConvertBGRAToRGB565(in_data, num_pixels, rgba); 1141 break; 1142 default: 1143 assert(0); // Code flow should not reach here. 1144 } 1145} 1146 1147//------------------------------------------------------------------------------ 1148 1149#if defined(__cplusplus) || defined(c_plusplus) 1150} // extern "C" 1151#endif 1152