lossless.c revision 4b2196c929b70f2cdc1c2556580d349db89356d8
1// Copyright 2012 Google Inc. All Rights Reserved.
2//
3// This code is licensed under the same terms as WebM:
4//  Software License Agreement:  http://www.webmproject.org/license/software/
5//  Additional IP Rights Grant:  http://www.webmproject.org/license/additional/
6// -----------------------------------------------------------------------------
7//
8// Image transforms and color space conversion methods for lossless decoder.
9//
10// Authors: Vikas Arora (vikaas.arora@gmail.com)
11//          Jyrki Alakuijala (jyrki@google.com)
12//          Urvang Joshi (urvang@google.com)
13
14#if defined(__cplusplus) || defined(c_plusplus)
15extern "C" {
16#endif
17
18#include <math.h>
19#include <stdlib.h>
20#include "./lossless.h"
21#include "../dec/vp8li.h"
22#include "../dsp/yuv.h"
23#include "../dsp/dsp.h"
24
25#define MAX_DIFF_COST (1e30f)
26
27// lookup table for small values of log2(int)
28#define APPROX_LOG_MAX  4096
29#define LOG_2_RECIPROCAL 1.44269504088896338700465094007086
30#define LOG_LOOKUP_IDX_MAX 256
31static const float kLog2Table[LOG_LOOKUP_IDX_MAX] = {
32  0.0000000000000000f, 0.0000000000000000f,
33  1.0000000000000000f, 1.5849625007211560f,
34  2.0000000000000000f, 2.3219280948873621f,
35  2.5849625007211560f, 2.8073549220576041f,
36  3.0000000000000000f, 3.1699250014423121f,
37  3.3219280948873621f, 3.4594316186372973f,
38  3.5849625007211560f, 3.7004397181410921f,
39  3.8073549220576041f, 3.9068905956085187f,
40  4.0000000000000000f, 4.0874628412503390f,
41  4.1699250014423121f, 4.2479275134435852f,
42  4.3219280948873626f, 4.3923174227787606f,
43  4.4594316186372973f, 4.5235619560570130f,
44  4.5849625007211560f, 4.6438561897747243f,
45  4.7004397181410917f, 4.7548875021634682f,
46  4.8073549220576037f, 4.8579809951275718f,
47  4.9068905956085187f, 4.9541963103868749f,
48  5.0000000000000000f, 5.0443941193584533f,
49  5.0874628412503390f, 5.1292830169449663f,
50  5.1699250014423121f, 5.2094533656289501f,
51  5.2479275134435852f, 5.2854022188622487f,
52  5.3219280948873626f, 5.3575520046180837f,
53  5.3923174227787606f, 5.4262647547020979f,
54  5.4594316186372973f, 5.4918530963296747f,
55  5.5235619560570130f, 5.5545888516776376f,
56  5.5849625007211560f, 5.6147098441152083f,
57  5.6438561897747243f, 5.6724253419714951f,
58  5.7004397181410917f, 5.7279204545631987f,
59  5.7548875021634682f, 5.7813597135246599f,
60  5.8073549220576037f, 5.8328900141647412f,
61  5.8579809951275718f, 5.8826430493618415f,
62  5.9068905956085187f, 5.9307373375628866f,
63  5.9541963103868749f, 5.9772799234999167f,
64  6.0000000000000000f, 6.0223678130284543f,
65  6.0443941193584533f, 6.0660891904577720f,
66  6.0874628412503390f, 6.1085244567781691f,
67  6.1292830169449663f, 6.1497471195046822f,
68  6.1699250014423121f, 6.1898245588800175f,
69  6.2094533656289501f, 6.2288186904958804f,
70  6.2479275134435852f, 6.2667865406949010f,
71  6.2854022188622487f, 6.3037807481771030f,
72  6.3219280948873626f, 6.3398500028846243f,
73  6.3575520046180837f, 6.3750394313469245f,
74  6.3923174227787606f, 6.4093909361377017f,
75  6.4262647547020979f, 6.4429434958487279f,
76  6.4594316186372973f, 6.4757334309663976f,
77  6.4918530963296747f, 6.5077946401986963f,
78  6.5235619560570130f, 6.5391588111080309f,
79  6.5545888516776376f, 6.5698556083309478f,
80  6.5849625007211560f, 6.5999128421871278f,
81  6.6147098441152083f, 6.6293566200796094f,
82  6.6438561897747243f, 6.6582114827517946f,
83  6.6724253419714951f, 6.6865005271832185f,
84  6.7004397181410917f, 6.7142455176661224f,
85  6.7279204545631987f, 6.7414669864011464f,
86  6.7548875021634682f, 6.7681843247769259f,
87  6.7813597135246599f, 6.7944158663501061f,
88  6.8073549220576037f, 6.8201789624151878f,
89  6.8328900141647412f, 6.8454900509443747f,
90  6.8579809951275718f, 6.8703647195834047f,
91  6.8826430493618415f, 6.8948177633079437f,
92  6.9068905956085187f, 6.9188632372745946f,
93  6.9307373375628866f, 6.9425145053392398f,
94  6.9541963103868749f, 6.9657842846620869f,
95  6.9772799234999167f, 6.9886846867721654f,
96  7.0000000000000000f, 7.0112272554232539f,
97  7.0223678130284543f, 7.0334230015374501f,
98  7.0443941193584533f, 7.0552824355011898f,
99  7.0660891904577720f, 7.0768155970508308f,
100  7.0874628412503390f, 7.0980320829605263f,
101  7.1085244567781691f, 7.1189410727235076f,
102  7.1292830169449663f, 7.1395513523987936f,
103  7.1497471195046822f, 7.1598713367783890f,
104  7.1699250014423121f, 7.1799090900149344f,
105  7.1898245588800175f, 7.1996723448363644f,
106  7.2094533656289501f, 7.2191685204621611f,
107  7.2288186904958804f, 7.2384047393250785f,
108  7.2479275134435852f, 7.2573878426926521f,
109  7.2667865406949010f, 7.2761244052742375f,
110  7.2854022188622487f, 7.2946207488916270f,
111  7.3037807481771030f, 7.3128829552843557f,
112  7.3219280948873626f, 7.3309168781146167f,
113  7.3398500028846243f, 7.3487281542310771f,
114  7.3575520046180837f, 7.3663222142458160f,
115  7.3750394313469245f, 7.3837042924740519f,
116  7.3923174227787606f, 7.4008794362821843f,
117  7.4093909361377017f, 7.4178525148858982f,
118  7.4262647547020979f, 7.4346282276367245f,
119  7.4429434958487279f, 7.4512111118323289f,
120  7.4594316186372973f, 7.4676055500829976f,
121  7.4757334309663976f, 7.4838157772642563f,
122  7.4918530963296747f, 7.4998458870832056f,
123  7.5077946401986963f, 7.5156998382840427f,
124  7.5235619560570130f, 7.5313814605163118f,
125  7.5391588111080309f, 7.5468944598876364f,
126  7.5545888516776376f, 7.5622424242210728f,
127  7.5698556083309478f, 7.5774288280357486f,
128  7.5849625007211560f, 7.5924570372680806f,
129  7.5999128421871278f, 7.6073303137496104f,
130  7.6147098441152083f, 7.6220518194563764f,
131  7.6293566200796094f, 7.6366246205436487f,
132  7.6438561897747243f, 7.6510516911789281f,
133  7.6582114827517946f, 7.6653359171851764f,
134  7.6724253419714951f, 7.6794800995054464f,
135  7.6865005271832185f, 7.6934869574993252f,
136  7.7004397181410917f, 7.7073591320808825f,
137  7.7142455176661224f, 7.7210991887071855f,
138  7.7279204545631987f, 7.7347096202258383f,
139  7.7414669864011464f, 7.7481928495894605f,
140  7.7548875021634682f, 7.7615512324444795f,
141  7.7681843247769259f, 7.7747870596011736f,
142  7.7813597135246599f, 7.7879025593914317f,
143  7.7944158663501061f, 7.8008998999203047f,
144  7.8073549220576037f, 7.8137811912170374f,
145  7.8201789624151878f, 7.8265484872909150f,
146  7.8328900141647412f, 7.8392037880969436f,
147  7.8454900509443747f, 7.8517490414160571f,
148  7.8579809951275718f, 7.8641861446542797f,
149  7.8703647195834047f, 7.8765169465649993f,
150  7.8826430493618415f, 7.8887432488982591f,
151  7.8948177633079437f, 7.9008668079807486f,
152  7.9068905956085187f, 7.9128893362299619f,
153  7.9188632372745946f, 7.9248125036057812f,
154  7.9307373375628866f, 7.9366379390025709f,
155  7.9425145053392398f, 7.9483672315846778f,
156  7.9541963103868749f, 7.9600019320680805f,
157  7.9657842846620869f, 7.9715435539507719f,
158  7.9772799234999167f, 7.9829935746943103f,
159  7.9886846867721654f, 7.9943534368588577f
160};
161
162float VP8LFastLog2(int v) {
163  if (v < LOG_LOOKUP_IDX_MAX) {
164    return kLog2Table[v];
165  } else if (v < APPROX_LOG_MAX) {
166    int log_cnt = 0;
167    while (v >= LOG_LOOKUP_IDX_MAX) {
168      ++log_cnt;
169      v = v >> 1;
170    }
171    return kLog2Table[v] + (float)log_cnt;
172  } else {
173    return (float)(LOG_2_RECIPROCAL * log((double)v));
174  }
175}
176
177//------------------------------------------------------------------------------
178// Image transforms.
179
180// In-place sum of each component with mod 256.
181static WEBP_INLINE void AddPixelsEq(uint32_t* a, uint32_t b) {
182  const uint32_t alpha_and_green = (*a & 0xff00ff00u) + (b & 0xff00ff00u);
183  const uint32_t red_and_blue = (*a & 0x00ff00ffu) + (b & 0x00ff00ffu);
184  *a = (alpha_and_green & 0xff00ff00u) | (red_and_blue & 0x00ff00ffu);
185}
186
187static WEBP_INLINE uint32_t Average2(uint32_t a0, uint32_t a1) {
188  return (((a0 ^ a1) & 0xfefefefeL) >> 1) + (a0 & a1);
189}
190
191static WEBP_INLINE uint32_t Average3(uint32_t a0, uint32_t a1, uint32_t a2) {
192  return Average2(Average2(a0, a2), a1);
193}
194
195static WEBP_INLINE uint32_t Average4(uint32_t a0, uint32_t a1,
196                                     uint32_t a2, uint32_t a3) {
197  return Average2(Average2(a0, a1), Average2(a2, a3));
198}
199
200static WEBP_INLINE uint32_t Clip255(uint32_t a) {
201  if (a < 256) {
202    return a;
203  }
204  // return 0, when a is a negative integer.
205  // return 255, when a is positive.
206  return ~a >> 24;
207}
208
209static WEBP_INLINE int AddSubtractComponentFull(int a, int b, int c) {
210  return Clip255(a + b - c);
211}
212
213static WEBP_INLINE uint32_t ClampedAddSubtractFull(uint32_t c0, uint32_t c1,
214                                                   uint32_t c2) {
215  const int a = AddSubtractComponentFull(c0 >> 24, c1 >> 24, c2 >> 24);
216  const int r = AddSubtractComponentFull((c0 >> 16) & 0xff,
217                                         (c1 >> 16) & 0xff,
218                                         (c2 >> 16) & 0xff);
219  const int g = AddSubtractComponentFull((c0 >> 8) & 0xff,
220                                         (c1 >> 8) & 0xff,
221                                         (c2 >> 8) & 0xff);
222  const int b = AddSubtractComponentFull(c0 & 0xff, c1 & 0xff, c2 & 0xff);
223  return (a << 24) | (r << 16) | (g << 8) | b;
224}
225
226static WEBP_INLINE int AddSubtractComponentHalf(int a, int b) {
227  return Clip255(a + (a - b) / 2);
228}
229
230static WEBP_INLINE uint32_t ClampedAddSubtractHalf(uint32_t c0, uint32_t c1,
231                                                   uint32_t c2) {
232  const uint32_t ave = Average2(c0, c1);
233  const int a = AddSubtractComponentHalf(ave >> 24, c2 >> 24);
234  const int r = AddSubtractComponentHalf((ave >> 16) & 0xff, (c2 >> 16) & 0xff);
235  const int g = AddSubtractComponentHalf((ave >> 8) & 0xff, (c2 >> 8) & 0xff);
236  const int b = AddSubtractComponentHalf((ave >> 0) & 0xff, (c2 >> 0) & 0xff);
237  return (a << 24) | (r << 16) | (g << 8) | b;
238}
239
240static WEBP_INLINE int Sub3(int a, int b, int c) {
241  const int pa = b - c;
242  const int pb = a - c;
243  return abs(pa) - abs(pb);
244}
245
246static WEBP_INLINE uint32_t Select(uint32_t a, uint32_t b, uint32_t c) {
247  const int pa_minus_pb =
248      Sub3((a >> 24)       , (b >> 24)       , (c >> 24)       ) +
249      Sub3((a >> 16) & 0xff, (b >> 16) & 0xff, (c >> 16) & 0xff) +
250      Sub3((a >>  8) & 0xff, (b >>  8) & 0xff, (c >>  8) & 0xff) +
251      Sub3((a      ) & 0xff, (b      ) & 0xff, (c      ) & 0xff);
252
253  return (pa_minus_pb <= 0) ? a : b;
254}
255
256//------------------------------------------------------------------------------
257// Predictors
258
259static uint32_t Predictor0(uint32_t left, const uint32_t* const top) {
260  (void)top;
261  (void)left;
262  return ARGB_BLACK;
263}
264static uint32_t Predictor1(uint32_t left, const uint32_t* const top) {
265  (void)top;
266  return left;
267}
268static uint32_t Predictor2(uint32_t left, const uint32_t* const top) {
269  (void)left;
270  return top[0];
271}
272static uint32_t Predictor3(uint32_t left, const uint32_t* const top) {
273  (void)left;
274  return top[1];
275}
276static uint32_t Predictor4(uint32_t left, const uint32_t* const top) {
277  (void)left;
278  return top[-1];
279}
280static uint32_t Predictor5(uint32_t left, const uint32_t* const top) {
281  const uint32_t pred = Average3(left, top[0], top[1]);
282  return pred;
283}
284static uint32_t Predictor6(uint32_t left, const uint32_t* const top) {
285  const uint32_t pred = Average2(left, top[-1]);
286  return pred;
287}
288static uint32_t Predictor7(uint32_t left, const uint32_t* const top) {
289  const uint32_t pred = Average2(left, top[0]);
290  return pred;
291}
292static uint32_t Predictor8(uint32_t left, const uint32_t* const top) {
293  const uint32_t pred = Average2(top[-1], top[0]);
294  (void)left;
295  return pred;
296}
297static uint32_t Predictor9(uint32_t left, const uint32_t* const top) {
298  const uint32_t pred = Average2(top[0], top[1]);
299  (void)left;
300  return pred;
301}
302static uint32_t Predictor10(uint32_t left, const uint32_t* const top) {
303  const uint32_t pred = Average4(left, top[-1], top[0], top[1]);
304  return pred;
305}
306static uint32_t Predictor11(uint32_t left, const uint32_t* const top) {
307  const uint32_t pred = Select(top[0], left, top[-1]);
308  return pred;
309}
310static uint32_t Predictor12(uint32_t left, const uint32_t* const top) {
311  const uint32_t pred = ClampedAddSubtractFull(left, top[0], top[-1]);
312  return pred;
313}
314static uint32_t Predictor13(uint32_t left, const uint32_t* const top) {
315  const uint32_t pred = ClampedAddSubtractHalf(left, top[0], top[-1]);
316  return pred;
317}
318
319typedef uint32_t (*PredictorFunc)(uint32_t left, const uint32_t* const top);
320static const PredictorFunc kPredictors[16] = {
321  Predictor0, Predictor1, Predictor2, Predictor3,
322  Predictor4, Predictor5, Predictor6, Predictor7,
323  Predictor8, Predictor9, Predictor10, Predictor11,
324  Predictor12, Predictor13,
325  Predictor0, Predictor0    // <- padding security sentinels
326};
327
328// TODO(vikasa): Replace 256 etc with defines.
329static float PredictionCostSpatial(const int* counts,
330                                   int weight_0, double exp_val) {
331  const int significant_symbols = 16;
332  const double exp_decay_factor = 0.6;
333  double bits = weight_0 * counts[0];
334  int i;
335  for (i = 1; i < significant_symbols; ++i) {
336    bits += exp_val * (counts[i] + counts[256 - i]);
337    exp_val *= exp_decay_factor;
338  }
339  return (float)(-0.1 * bits);
340}
341
342// Compute the Shanon's entropy: Sum(p*log2(p))
343static float ShannonEntropy(const int* const array, int n) {
344  int i;
345  float retval = 0.f;
346  int sum = 0;
347  for (i = 0; i < n; ++i) {
348    if (array[i] != 0) {
349      sum += array[i];
350      retval -= VP8LFastSLog2(array[i]);
351    }
352  }
353  retval += VP8LFastSLog2(sum);
354  return retval;
355}
356
357static float PredictionCostSpatialHistogram(int accumulated[4][256],
358                                            int tile[4][256]) {
359  int i;
360  int k;
361  int combo[256];
362  double retval = 0;
363  for (i = 0; i < 4; ++i) {
364    const double exp_val = 0.94;
365    retval += PredictionCostSpatial(&tile[i][0], 1, exp_val);
366    retval += ShannonEntropy(&tile[i][0], 256);
367    for (k = 0; k < 256; ++k) {
368      combo[k] = accumulated[i][k] + tile[i][k];
369    }
370    retval += ShannonEntropy(&combo[0], 256);
371  }
372  return (float)retval;
373}
374
375static int GetBestPredictorForTile(int width, int height,
376                                   int tile_x, int tile_y, int bits,
377                                   int accumulated[4][256],
378                                   const uint32_t* const argb_scratch) {
379  const int kNumPredModes = 14;
380  const int col_start = tile_x << bits;
381  const int row_start = tile_y << bits;
382  const int tile_size = 1 << bits;
383  const int ymax = (tile_size <= height - row_start) ?
384      tile_size : height - row_start;
385  const int xmax = (tile_size <= width - col_start) ?
386      tile_size : width - col_start;
387  int histo[4][256];
388  float best_diff = MAX_DIFF_COST;
389  int best_mode = 0;
390
391  int mode;
392  for (mode = 0; mode < kNumPredModes; ++mode) {
393    const uint32_t* current_row = argb_scratch;
394    const PredictorFunc pred_func = kPredictors[mode];
395    float cur_diff;
396    int y;
397    memset(&histo[0][0], 0, sizeof(histo));
398    for (y = 0; y < ymax; ++y) {
399      int x;
400      const int row = row_start + y;
401      const uint32_t* const upper_row = current_row;
402      current_row = upper_row + width;
403      for (x = 0; x < xmax; ++x) {
404        const int col = col_start + x;
405        uint32_t predict;
406        uint32_t predict_diff;
407        if (row == 0) {
408          predict = (col == 0) ? ARGB_BLACK : current_row[col - 1];  // Left.
409        } else if (col == 0) {
410          predict = upper_row[col];  // Top.
411        } else {
412          predict = pred_func(current_row[col - 1], upper_row + col);
413        }
414        predict_diff = VP8LSubPixels(current_row[col], predict);
415        ++histo[0][predict_diff >> 24];
416        ++histo[1][((predict_diff >> 16) & 0xff)];
417        ++histo[2][((predict_diff >> 8) & 0xff)];
418        ++histo[3][(predict_diff & 0xff)];
419      }
420    }
421    cur_diff = PredictionCostSpatialHistogram(accumulated, histo);
422    if (cur_diff < best_diff) {
423      best_diff = cur_diff;
424      best_mode = mode;
425    }
426  }
427
428  return best_mode;
429}
430
431static void CopyTileWithPrediction(int width, int height,
432                                   int tile_x, int tile_y, int bits, int mode,
433                                   const uint32_t* const argb_scratch,
434                                   uint32_t* const argb) {
435  const int col_start = tile_x << bits;
436  const int row_start = tile_y << bits;
437  const int tile_size = 1 << bits;
438  const int ymax = (tile_size <= height - row_start) ?
439      tile_size : height - row_start;
440  const int xmax = (tile_size <= width - col_start) ?
441      tile_size : width - col_start;
442  const PredictorFunc pred_func = kPredictors[mode];
443  const uint32_t* current_row = argb_scratch;
444
445  int y;
446  for (y = 0; y < ymax; ++y) {
447    int x;
448    const int row = row_start + y;
449    const uint32_t* const upper_row = current_row;
450    current_row = upper_row + width;
451    for (x = 0; x < xmax; ++x) {
452      const int col = col_start + x;
453      const int pix = row * width + col;
454      uint32_t predict;
455      if (row == 0) {
456        predict = (col == 0) ? ARGB_BLACK : current_row[col - 1];  // Left.
457      } else if (col == 0) {
458        predict = upper_row[col];  // Top.
459      } else {
460        predict = pred_func(current_row[col - 1], upper_row + col);
461      }
462      argb[pix] = VP8LSubPixels(current_row[col], predict);
463    }
464  }
465}
466
467void VP8LResidualImage(int width, int height, int bits,
468                       uint32_t* const argb, uint32_t* const argb_scratch,
469                       uint32_t* const image) {
470  const int max_tile_size = 1 << bits;
471  const int tiles_per_row = VP8LSubSampleSize(width, bits);
472  const int tiles_per_col = VP8LSubSampleSize(height, bits);
473  uint32_t* const upper_row = argb_scratch;
474  uint32_t* const current_tile_rows = argb_scratch + width;
475  int tile_y;
476  int histo[4][256];
477  memset(histo, 0, sizeof(histo));
478  for (tile_y = 0; tile_y < tiles_per_col; ++tile_y) {
479    const int tile_y_offset = tile_y * max_tile_size;
480    const int this_tile_height =
481        (tile_y < tiles_per_col - 1) ? max_tile_size : height - tile_y_offset;
482    int tile_x;
483    if (tile_y > 0) {
484      memcpy(upper_row, current_tile_rows + (max_tile_size - 1) * width,
485             width * sizeof(*upper_row));
486    }
487    memcpy(current_tile_rows, &argb[tile_y_offset * width],
488           this_tile_height * width * sizeof(*current_tile_rows));
489    for (tile_x = 0; tile_x < tiles_per_row; ++tile_x) {
490      int pred;
491      int y;
492      const int tile_x_offset = tile_x * max_tile_size;
493      int all_x_max = tile_x_offset + max_tile_size;
494      if (all_x_max > width) {
495        all_x_max = width;
496      }
497      pred = GetBestPredictorForTile(width, height, tile_x, tile_y, bits, histo,
498                                     argb_scratch);
499      image[tile_y * tiles_per_row + tile_x] = 0xff000000u | (pred << 8);
500      CopyTileWithPrediction(width, height, tile_x, tile_y, bits, pred,
501                             argb_scratch, argb);
502      for (y = 0; y < max_tile_size; ++y) {
503        int ix;
504        int all_x;
505        int all_y = tile_y_offset + y;
506        if (all_y >= height) {
507          break;
508        }
509        ix = all_y * width + tile_x_offset;
510        for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
511          const uint32_t a = argb[ix];
512          ++histo[0][a >> 24];
513          ++histo[1][((a >> 16) & 0xff)];
514          ++histo[2][((a >> 8) & 0xff)];
515          ++histo[3][(a & 0xff)];
516        }
517      }
518    }
519  }
520}
521
522// Inverse prediction.
523static void PredictorInverseTransform(const VP8LTransform* const transform,
524                                      int y_start, int y_end, uint32_t* data) {
525  const int width = transform->xsize_;
526  if (y_start == 0) {  // First Row follows the L (mode=1) mode.
527    int x;
528    const uint32_t pred0 = Predictor0(data[-1], NULL);
529    AddPixelsEq(data, pred0);
530    for (x = 1; x < width; ++x) {
531      const uint32_t pred1 = Predictor1(data[x - 1], NULL);
532      AddPixelsEq(data + x, pred1);
533    }
534    data += width;
535    ++y_start;
536  }
537
538  {
539    int y = y_start;
540    const int mask = (1 << transform->bits_) - 1;
541    const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
542    const uint32_t* pred_mode_base =
543        transform->data_ + (y >> transform->bits_) * tiles_per_row;
544
545    while (y < y_end) {
546      int x;
547      const uint32_t pred2 = Predictor2(data[-1], data - width);
548      const uint32_t* pred_mode_src = pred_mode_base;
549      PredictorFunc pred_func;
550
551      // First pixel follows the T (mode=2) mode.
552      AddPixelsEq(data, pred2);
553
554      // .. the rest:
555      pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf];
556      for (x = 1; x < width; ++x) {
557        uint32_t pred;
558        if ((x & mask) == 0) {    // start of tile. Read predictor function.
559          pred_func = kPredictors[((*pred_mode_src++) >> 8) & 0xf];
560        }
561        pred = pred_func(data[x - 1], data + x - width);
562        AddPixelsEq(data + x, pred);
563      }
564      data += width;
565      ++y;
566      if ((y & mask) == 0) {   // Use the same mask, since tiles are squares.
567        pred_mode_base += tiles_per_row;
568      }
569    }
570  }
571}
572
573void VP8LSubtractGreenFromBlueAndRed(uint32_t* argb_data, int num_pixs) {
574  int i;
575  for (i = 0; i < num_pixs; ++i) {
576    const uint32_t argb = argb_data[i];
577    const uint32_t green = (argb >> 8) & 0xff;
578    const uint32_t new_r = (((argb >> 16) & 0xff) - green) & 0xff;
579    const uint32_t new_b = ((argb & 0xff) - green) & 0xff;
580    argb_data[i] = (argb & 0xff00ff00) | (new_r << 16) | new_b;
581  }
582}
583
584// Add green to blue and red channels (i.e. perform the inverse transform of
585// 'subtract green').
586static void AddGreenToBlueAndRed(const VP8LTransform* const transform,
587                                 int y_start, int y_end, uint32_t* data) {
588  const int width = transform->xsize_;
589  const uint32_t* const data_end = data + (y_end - y_start) * width;
590  while (data < data_end) {
591    const uint32_t argb = *data;
592    // "* 0001001u" is equivalent to "(green << 16) + green)"
593    const uint32_t green = ((argb >> 8) & 0xff);
594    uint32_t red_blue = (argb & 0x00ff00ffu);
595    red_blue += (green << 16) | green;
596    red_blue &= 0x00ff00ffu;
597    *data++ = (argb & 0xff00ff00u) | red_blue;
598  }
599}
600
601typedef struct {
602  // Note: the members are uint8_t, so that any negative values are
603  // automatically converted to "mod 256" values.
604  uint8_t green_to_red_;
605  uint8_t green_to_blue_;
606  uint8_t red_to_blue_;
607} Multipliers;
608
609static WEBP_INLINE void MultipliersClear(Multipliers* m) {
610  m->green_to_red_ = 0;
611  m->green_to_blue_ = 0;
612  m->red_to_blue_ = 0;
613}
614
615static WEBP_INLINE uint32_t ColorTransformDelta(int8_t color_pred,
616                                                int8_t color) {
617  return (uint32_t)((int)(color_pred) * color) >> 5;
618}
619
620static WEBP_INLINE void ColorCodeToMultipliers(uint32_t color_code,
621                                               Multipliers* const m) {
622  m->green_to_red_  = (color_code >>  0) & 0xff;
623  m->green_to_blue_ = (color_code >>  8) & 0xff;
624  m->red_to_blue_   = (color_code >> 16) & 0xff;
625}
626
627static WEBP_INLINE uint32_t MultipliersToColorCode(Multipliers* const m) {
628  return 0xff000000u |
629         ((uint32_t)(m->red_to_blue_) << 16) |
630         ((uint32_t)(m->green_to_blue_) << 8) |
631         m->green_to_red_;
632}
633
634static WEBP_INLINE uint32_t TransformColor(const Multipliers* const m,
635                                           uint32_t argb, int inverse) {
636  const uint32_t green = argb >> 8;
637  const uint32_t red = argb >> 16;
638  uint32_t new_red = red;
639  uint32_t new_blue = argb;
640
641  if (inverse) {
642    new_red += ColorTransformDelta(m->green_to_red_, green);
643    new_red &= 0xff;
644    new_blue += ColorTransformDelta(m->green_to_blue_, green);
645    new_blue += ColorTransformDelta(m->red_to_blue_, new_red);
646    new_blue &= 0xff;
647  } else {
648    new_red -= ColorTransformDelta(m->green_to_red_, green);
649    new_red &= 0xff;
650    new_blue -= ColorTransformDelta(m->green_to_blue_, green);
651    new_blue -= ColorTransformDelta(m->red_to_blue_, red);
652    new_blue &= 0xff;
653  }
654  return (argb & 0xff00ff00u) | (new_red << 16) | (new_blue);
655}
656
657static WEBP_INLINE int SkipRepeatedPixels(const uint32_t* const argb,
658                                          int ix, int xsize) {
659  const uint32_t v = argb[ix];
660  if (ix >= xsize + 3) {
661    if (v == argb[ix - xsize] &&
662        argb[ix - 1] == argb[ix - xsize - 1] &&
663        argb[ix - 2] == argb[ix - xsize - 2] &&
664        argb[ix - 3] == argb[ix - xsize - 3]) {
665      return 1;
666    }
667    return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1];
668  } else if (ix >= 3) {
669    return v == argb[ix - 3] && v == argb[ix - 2] && v == argb[ix - 1];
670  }
671  return 0;
672}
673
674static float PredictionCostCrossColor(const int accumulated[256],
675                                      const int counts[256]) {
676  // Favor low entropy, locally and globally.
677  int i;
678  int combo[256];
679  for (i = 0; i < 256; ++i) {
680    combo[i] = accumulated[i] + counts[i];
681  }
682  return ShannonEntropy(combo, 256) +
683         ShannonEntropy(counts, 256) +
684         PredictionCostSpatial(counts, 3, 2.4);  // Favor small absolute values.
685}
686
687static Multipliers GetBestColorTransformForTile(
688    int tile_x, int tile_y, int bits,
689    Multipliers prevX,
690    Multipliers prevY,
691    int step, int xsize, int ysize,
692    int* accumulated_red_histo,
693    int* accumulated_blue_histo,
694    const uint32_t* const argb) {
695  float best_diff = MAX_DIFF_COST;
696  float cur_diff;
697  const int halfstep = step / 2;
698  const int max_tile_size = 1 << bits;
699  const int tile_y_offset = tile_y * max_tile_size;
700  const int tile_x_offset = tile_x * max_tile_size;
701  int green_to_red;
702  int green_to_blue;
703  int red_to_blue;
704  int all_x_max = tile_x_offset + max_tile_size;
705  int all_y_max = tile_y_offset + max_tile_size;
706  Multipliers best_tx;
707  MultipliersClear(&best_tx);
708  if (all_x_max > xsize) {
709    all_x_max = xsize;
710  }
711  if (all_y_max > ysize) {
712    all_y_max = ysize;
713  }
714  for (green_to_red = -64; green_to_red <= 64; green_to_red += halfstep) {
715    int histo[256] = { 0 };
716    int all_y;
717    Multipliers tx;
718    MultipliersClear(&tx);
719    tx.green_to_red_ = green_to_red & 0xff;
720
721    for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
722      uint32_t predict;
723      int ix = all_y * xsize + tile_x_offset;
724      int all_x;
725      for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
726        if (SkipRepeatedPixels(argb, ix, xsize)) {
727          continue;
728        }
729        predict = TransformColor(&tx, argb[ix], 0);
730        ++histo[(predict >> 16) & 0xff];  // red.
731      }
732    }
733    cur_diff = PredictionCostCrossColor(&accumulated_red_histo[0], &histo[0]);
734    if (tx.green_to_red_ == prevX.green_to_red_) {
735      cur_diff -= 3;  // favor keeping the areas locally similar
736    }
737    if (tx.green_to_red_ == prevY.green_to_red_) {
738      cur_diff -= 3;  // favor keeping the areas locally similar
739    }
740    if (tx.green_to_red_ == 0) {
741      cur_diff -= 3;
742    }
743    if (cur_diff < best_diff) {
744      best_diff = cur_diff;
745      best_tx = tx;
746    }
747  }
748  best_diff = MAX_DIFF_COST;
749  green_to_red = best_tx.green_to_red_;
750  for (green_to_blue = -32; green_to_blue <= 32; green_to_blue += step) {
751    for (red_to_blue = -32; red_to_blue <= 32; red_to_blue += step) {
752      int all_y;
753      int histo[256] = { 0 };
754      Multipliers tx;
755      tx.green_to_red_ = green_to_red;
756      tx.green_to_blue_ = green_to_blue;
757      tx.red_to_blue_ = red_to_blue;
758      for (all_y = tile_y_offset; all_y < all_y_max; ++all_y) {
759        uint32_t predict;
760        int all_x;
761        int ix = all_y * xsize + tile_x_offset;
762        for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
763          if (SkipRepeatedPixels(argb, ix, xsize)) {
764            continue;
765          }
766          predict = TransformColor(&tx, argb[ix], 0);
767          ++histo[predict & 0xff];  // blue.
768        }
769      }
770      cur_diff =
771        PredictionCostCrossColor(&accumulated_blue_histo[0], &histo[0]);
772      if (tx.green_to_blue_ == prevX.green_to_blue_) {
773        cur_diff -= 3;  // favor keeping the areas locally similar
774      }
775      if (tx.green_to_blue_ == prevY.green_to_blue_) {
776        cur_diff -= 3;  // favor keeping the areas locally similar
777      }
778      if (tx.red_to_blue_ == prevX.red_to_blue_) {
779        cur_diff -= 3;  // favor keeping the areas locally similar
780      }
781      if (tx.red_to_blue_ == prevY.red_to_blue_) {
782        cur_diff -= 3;  // favor keeping the areas locally similar
783      }
784      if (tx.green_to_blue_ == 0) {
785        cur_diff -= 3;
786      }
787      if (tx.red_to_blue_ == 0) {
788        cur_diff -= 3;
789      }
790      if (cur_diff < best_diff) {
791        best_diff = cur_diff;
792        best_tx = tx;
793      }
794    }
795  }
796  return best_tx;
797}
798
799static void CopyTileWithColorTransform(int xsize, int ysize,
800                                       int tile_x, int tile_y, int bits,
801                                       Multipliers color_transform,
802                                       uint32_t* const argb) {
803  int y;
804  int xscan = 1 << bits;
805  int yscan = 1 << bits;
806  tile_x <<= bits;
807  tile_y <<= bits;
808  if (xscan > xsize - tile_x) {
809    xscan = xsize - tile_x;
810  }
811  if (yscan > ysize - tile_y) {
812    yscan = ysize - tile_y;
813  }
814  yscan += tile_y;
815  for (y = tile_y; y < yscan; ++y) {
816    int ix = y * xsize + tile_x;
817    const int end_ix = ix + xscan;
818    for (; ix < end_ix; ++ix) {
819      argb[ix] = TransformColor(&color_transform, argb[ix], 0);
820    }
821  }
822}
823
824void VP8LColorSpaceTransform(int width, int height, int bits, int step,
825                             uint32_t* const argb, uint32_t* image) {
826  const int max_tile_size = 1 << bits;
827  int tile_xsize = VP8LSubSampleSize(width, bits);
828  int tile_ysize = VP8LSubSampleSize(height, bits);
829  int accumulated_red_histo[256] = { 0 };
830  int accumulated_blue_histo[256] = { 0 };
831  int tile_y;
832  int tile_x;
833  Multipliers prevX;
834  Multipliers prevY;
835  MultipliersClear(&prevY);
836  MultipliersClear(&prevX);
837  for (tile_y = 0; tile_y < tile_ysize; ++tile_y) {
838    for (tile_x = 0; tile_x < tile_xsize; ++tile_x) {
839      Multipliers color_transform;
840      int all_x_max;
841      int y;
842      const int tile_y_offset = tile_y * max_tile_size;
843      const int tile_x_offset = tile_x * max_tile_size;
844      if (tile_y != 0) {
845        ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX);
846        ColorCodeToMultipliers(image[(tile_y - 1) * tile_xsize + tile_x],
847                               &prevY);
848      } else if (tile_x != 0) {
849        ColorCodeToMultipliers(image[tile_y * tile_xsize + tile_x - 1], &prevX);
850      }
851      color_transform =
852          GetBestColorTransformForTile(tile_x, tile_y, bits,
853                                       prevX, prevY,
854                                       step, width, height,
855                                       &accumulated_red_histo[0],
856                                       &accumulated_blue_histo[0],
857                                       argb);
858      image[tile_y * tile_xsize + tile_x] =
859          MultipliersToColorCode(&color_transform);
860      CopyTileWithColorTransform(width, height, tile_x, tile_y, bits,
861                                 color_transform, argb);
862
863      // Gather accumulated histogram data.
864      all_x_max = tile_x_offset + max_tile_size;
865      if (all_x_max > width) {
866        all_x_max = width;
867      }
868      for (y = 0; y < max_tile_size; ++y) {
869        int ix;
870        int all_x;
871        int all_y = tile_y_offset + y;
872        if (all_y >= height) {
873          break;
874        }
875        ix = all_y * width + tile_x_offset;
876        for (all_x = tile_x_offset; all_x < all_x_max; ++all_x, ++ix) {
877          if (ix >= 2 &&
878              argb[ix] == argb[ix - 2] &&
879              argb[ix] == argb[ix - 1]) {
880            continue;  // repeated pixels are handled by backward references
881          }
882          if (ix >= width + 2 &&
883              argb[ix - 2] == argb[ix - width - 2] &&
884              argb[ix - 1] == argb[ix - width - 1] &&
885              argb[ix] == argb[ix - width]) {
886            continue;  // repeated pixels are handled by backward references
887          }
888          ++accumulated_red_histo[(argb[ix] >> 16) & 0xff];
889          ++accumulated_blue_histo[argb[ix] & 0xff];
890        }
891      }
892    }
893  }
894}
895
896// Color space inverse transform.
897static void ColorSpaceInverseTransform(const VP8LTransform* const transform,
898                                       int y_start, int y_end, uint32_t* data) {
899  const int width = transform->xsize_;
900  const int mask = (1 << transform->bits_) - 1;
901  const int tiles_per_row = VP8LSubSampleSize(width, transform->bits_);
902  int y = y_start;
903  const uint32_t* pred_row =
904      transform->data_ + (y >> transform->bits_) * tiles_per_row;
905
906  while (y < y_end) {
907    const uint32_t* pred = pred_row;
908    Multipliers m = { 0, 0, 0 };
909    int x;
910
911    for (x = 0; x < width; ++x) {
912      if ((x & mask) == 0) ColorCodeToMultipliers(*pred++, &m);
913      data[x] = TransformColor(&m, data[x], 1);
914    }
915    data += width;
916    ++y;
917    if ((y & mask) == 0) pred_row += tiles_per_row;;
918  }
919}
920
921// Separate out pixels packed together using pixel-bundling.
922static void ColorIndexInverseTransform(
923    const VP8LTransform* const transform,
924    int y_start, int y_end, const uint32_t* src, uint32_t* dst) {
925  int y;
926  const int bits_per_pixel = 8 >> transform->bits_;
927  const int width = transform->xsize_;
928  const uint32_t* const color_map = transform->data_;
929  if (bits_per_pixel < 8) {
930    const int pixels_per_byte = 1 << transform->bits_;
931    const int count_mask = pixels_per_byte - 1;
932    const uint32_t bit_mask = (1 << bits_per_pixel) - 1;
933    for (y = y_start; y < y_end; ++y) {
934      uint32_t packed_pixels = 0;
935      int x;
936      for (x = 0; x < width; ++x) {
937        // We need to load fresh 'packed_pixels' once every 'pixels_per_byte'
938        // increments of x. Fortunately, pixels_per_byte is a power of 2, so
939        // can just use a mask for that, instead of decrementing a counter.
940        if ((x & count_mask) == 0) packed_pixels = ((*src++) >> 8) & 0xff;
941        *dst++ = color_map[packed_pixels & bit_mask];
942        packed_pixels >>= bits_per_pixel;
943      }
944    }
945  } else {
946    for (y = y_start; y < y_end; ++y) {
947      int x;
948      for (x = 0; x < width; ++x) {
949        *dst++ = color_map[((*src++) >> 8) & 0xff];
950      }
951    }
952  }
953}
954
955void VP8LInverseTransform(const VP8LTransform* const transform,
956                          int row_start, int row_end,
957                          const uint32_t* const in, uint32_t* const out) {
958  assert(row_start < row_end);
959  assert(row_end <= transform->ysize_);
960  switch (transform->type_) {
961    case SUBTRACT_GREEN:
962      AddGreenToBlueAndRed(transform, row_start, row_end, out);
963      break;
964    case PREDICTOR_TRANSFORM:
965      PredictorInverseTransform(transform, row_start, row_end, out);
966      if (row_end != transform->ysize_) {
967        // The last predicted row in this iteration will be the top-pred row
968        // for the first row in next iteration.
969        const int width = transform->xsize_;
970        memcpy(out - width, out + (row_end - row_start - 1) * width,
971               width * sizeof(*out));
972      }
973      break;
974    case CROSS_COLOR_TRANSFORM:
975      ColorSpaceInverseTransform(transform, row_start, row_end, out);
976      break;
977    case COLOR_INDEXING_TRANSFORM:
978      if (in == out && transform->bits_ > 0) {
979        // Move packed pixels to the end of unpacked region, so that unpacking
980        // can occur seamlessly.
981        // Also, note that this is the only transform that applies on
982        // the effective width of VP8LSubSampleSize(xsize_, bits_). All other
983        // transforms work on effective width of xsize_.
984        const int out_stride = (row_end - row_start) * transform->xsize_;
985        const int in_stride = (row_end - row_start) *
986            VP8LSubSampleSize(transform->xsize_, transform->bits_);
987        uint32_t* const src = out + out_stride - in_stride;
988        memmove(src, out, in_stride * sizeof(*src));
989        ColorIndexInverseTransform(transform, row_start, row_end, src, out);
990      } else {
991        ColorIndexInverseTransform(transform, row_start, row_end, in, out);
992      }
993      break;
994  }
995}
996
997//------------------------------------------------------------------------------
998// Color space conversion.
999
1000static int is_big_endian(void) {
1001  static const union {
1002    uint16_t w;
1003    uint8_t b[2];
1004  } tmp = { 1 };
1005  return (tmp.b[0] != 1);
1006}
1007
1008static void ConvertBGRAToRGB(const uint32_t* src,
1009                             int num_pixels, uint8_t* dst) {
1010  const uint32_t* const src_end = src + num_pixels;
1011  while (src < src_end) {
1012    const uint32_t argb = *src++;
1013    *dst++ = (argb >> 16) & 0xff;
1014    *dst++ = (argb >>  8) & 0xff;
1015    *dst++ = (argb >>  0) & 0xff;
1016  }
1017}
1018
1019static void ConvertBGRAToRGBA(const uint32_t* src,
1020                              int num_pixels, uint8_t* dst) {
1021  const uint32_t* const src_end = src + num_pixels;
1022  while (src < src_end) {
1023    const uint32_t argb = *src++;
1024    *dst++ = (argb >> 16) & 0xff;
1025    *dst++ = (argb >>  8) & 0xff;
1026    *dst++ = (argb >>  0) & 0xff;
1027    *dst++ = (argb >> 24) & 0xff;
1028  }
1029}
1030
1031static void ConvertBGRAToRGBA4444(const uint32_t* src,
1032                                  int num_pixels, uint8_t* dst) {
1033  const uint32_t* const src_end = src + num_pixels;
1034  while (src < src_end) {
1035    const uint32_t argb = *src++;
1036    const uint8_t rg = ((argb >> 16) & 0xf0) | ((argb >> 12) & 0xf);
1037    const uint8_t ba = ((argb >>  0) & 0xf0) | ((argb >> 28) & 0xf);
1038#ifdef WEBP_SWAP_16BIT_CSP
1039    *dst++ = ba;
1040    *dst++ = rg;
1041#else
1042    *dst++ = rg;
1043    *dst++ = ba;
1044#endif
1045  }
1046}
1047
1048static void ConvertBGRAToRGB565(const uint32_t* src,
1049                                int num_pixels, uint8_t* dst) {
1050  const uint32_t* const src_end = src + num_pixels;
1051  while (src < src_end) {
1052    const uint32_t argb = *src++;
1053    const uint8_t rg = ((argb >> 16) & 0xf8) | ((argb >> 13) & 0x7);
1054    const uint8_t gb = ((argb >>  5) & 0xe0) | ((argb >>  3) & 0x1f);
1055#ifdef WEBP_SWAP_16BIT_CSP
1056    *dst++ = gb;
1057    *dst++ = rg;
1058#else
1059    *dst++ = rg;
1060    *dst++ = gb;
1061#endif
1062  }
1063}
1064
1065static void ConvertBGRAToBGR(const uint32_t* src,
1066                             int num_pixels, uint8_t* dst) {
1067  const uint32_t* const src_end = src + num_pixels;
1068  while (src < src_end) {
1069    const uint32_t argb = *src++;
1070    *dst++ = (argb >>  0) & 0xff;
1071    *dst++ = (argb >>  8) & 0xff;
1072    *dst++ = (argb >> 16) & 0xff;
1073  }
1074}
1075
1076static void CopyOrSwap(const uint32_t* src, int num_pixels, uint8_t* dst,
1077                       int swap_on_big_endian) {
1078  if (is_big_endian() == swap_on_big_endian) {
1079    const uint32_t* const src_end = src + num_pixels;
1080    while (src < src_end) {
1081      uint32_t argb = *src++;
1082#if !defined(__BIG_ENDIAN__) && (defined(__i386__) || defined(__x86_64__))
1083      __asm__ volatile("bswap %0" : "=r"(argb) : "0"(argb));
1084      *(uint32_t*)dst = argb;
1085      dst += sizeof(argb);
1086#elif !defined(__BIG_ENDIAN__) && defined(_MSC_VER)
1087      argb = _byteswap_ulong(argb);
1088      *(uint32_t*)dst = argb;
1089      dst += sizeof(argb);
1090#else
1091      *dst++ = (argb >> 24) & 0xff;
1092      *dst++ = (argb >> 16) & 0xff;
1093      *dst++ = (argb >>  8) & 0xff;
1094      *dst++ = (argb >>  0) & 0xff;
1095#endif
1096    }
1097  } else {
1098    memcpy(dst, src, num_pixels * sizeof(*src));
1099  }
1100}
1101
1102void VP8LConvertFromBGRA(const uint32_t* const in_data, int num_pixels,
1103                         WEBP_CSP_MODE out_colorspace, uint8_t* const rgba) {
1104  switch (out_colorspace) {
1105    case MODE_RGB:
1106      ConvertBGRAToRGB(in_data, num_pixels, rgba);
1107      break;
1108    case MODE_RGBA:
1109      ConvertBGRAToRGBA(in_data, num_pixels, rgba);
1110      break;
1111    case MODE_rgbA:
1112      ConvertBGRAToRGBA(in_data, num_pixels, rgba);
1113      WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
1114      break;
1115    case MODE_BGR:
1116      ConvertBGRAToBGR(in_data, num_pixels, rgba);
1117      break;
1118    case MODE_BGRA:
1119      CopyOrSwap(in_data, num_pixels, rgba, 1);
1120      break;
1121    case MODE_bgrA:
1122      CopyOrSwap(in_data, num_pixels, rgba, 1);
1123      WebPApplyAlphaMultiply(rgba, 0, num_pixels, 1, 0);
1124      break;
1125    case MODE_ARGB:
1126      CopyOrSwap(in_data, num_pixels, rgba, 0);
1127      break;
1128    case MODE_Argb:
1129      CopyOrSwap(in_data, num_pixels, rgba, 0);
1130      WebPApplyAlphaMultiply(rgba, 1, num_pixels, 1, 0);
1131      break;
1132    case MODE_RGBA_4444:
1133      ConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
1134      break;
1135    case MODE_rgbA_4444:
1136      ConvertBGRAToRGBA4444(in_data, num_pixels, rgba);
1137      WebPApplyAlphaMultiply4444(rgba, num_pixels, 1, 0);
1138      break;
1139    case MODE_RGB_565:
1140      ConvertBGRAToRGB565(in_data, num_pixels, rgba);
1141      break;
1142    default:
1143      assert(0);          // Code flow should not reach here.
1144  }
1145}
1146
1147//------------------------------------------------------------------------------
1148
1149#if defined(__cplusplus) || defined(c_plusplus)
1150}    // extern "C"
1151#endif
1152