1// Copyright 2012 Google Inc. All Rights Reserved.
2//
3// Use of this source code is governed by a BSD-style license
4// that can be found in the COPYING file in the root of the source
5// tree. An additional intellectual property rights grant can be found
6// in the file PATENTS. All contributing project authors may
7// be found in the AUTHORS file in the root of the source tree.
8// -----------------------------------------------------------------------------
9//
10// Author: Jyrki Alakuijala (jyrki@google.com)
11//
12#ifdef HAVE_CONFIG_H
13#include "config.h"
14#endif
15
16#include <math.h>
17#include <stdio.h>
18
19#include "./backward_references.h"
20#include "./histogram.h"
21#include "../dsp/lossless.h"
22#include "../utils/utils.h"
23
24static void HistogramClear(VP8LHistogram* const p) {
25  memset(p->literal_, 0, sizeof(p->literal_));
26  memset(p->red_, 0, sizeof(p->red_));
27  memset(p->blue_, 0, sizeof(p->blue_));
28  memset(p->alpha_, 0, sizeof(p->alpha_));
29  memset(p->distance_, 0, sizeof(p->distance_));
30  p->bit_cost_ = 0;
31}
32
33void VP8LHistogramStoreRefs(const VP8LBackwardRefs* const refs,
34                            VP8LHistogram* const histo) {
35  int i;
36  for (i = 0; i < refs->size; ++i) {
37    VP8LHistogramAddSinglePixOrCopy(histo, &refs->refs[i]);
38  }
39}
40
41void VP8LHistogramCreate(VP8LHistogram* const p,
42                         const VP8LBackwardRefs* const refs,
43                         int palette_code_bits) {
44  if (palette_code_bits >= 0) {
45    p->palette_code_bits_ = palette_code_bits;
46  }
47  HistogramClear(p);
48  VP8LHistogramStoreRefs(refs, p);
49}
50
51void VP8LHistogramInit(VP8LHistogram* const p, int palette_code_bits) {
52  p->palette_code_bits_ = palette_code_bits;
53  HistogramClear(p);
54}
55
56VP8LHistogramSet* VP8LAllocateHistogramSet(int size, int cache_bits) {
57  int i;
58  VP8LHistogramSet* set;
59  VP8LHistogram* bulk;
60  const uint64_t total_size = sizeof(*set)
61                            + (uint64_t)size * sizeof(*set->histograms)
62                            + (uint64_t)size * sizeof(**set->histograms);
63  uint8_t* memory = (uint8_t*)WebPSafeMalloc(total_size, sizeof(*memory));
64  if (memory == NULL) return NULL;
65
66  set = (VP8LHistogramSet*)memory;
67  memory += sizeof(*set);
68  set->histograms = (VP8LHistogram**)memory;
69  memory += size * sizeof(*set->histograms);
70  bulk = (VP8LHistogram*)memory;
71  set->max_size = size;
72  set->size = size;
73  for (i = 0; i < size; ++i) {
74    set->histograms[i] = bulk + i;
75    VP8LHistogramInit(set->histograms[i], cache_bits);
76  }
77  return set;
78}
79
80// -----------------------------------------------------------------------------
81
82void VP8LHistogramAddSinglePixOrCopy(VP8LHistogram* const histo,
83                                     const PixOrCopy* const v) {
84  if (PixOrCopyIsLiteral(v)) {
85    ++histo->alpha_[PixOrCopyLiteral(v, 3)];
86    ++histo->red_[PixOrCopyLiteral(v, 2)];
87    ++histo->literal_[PixOrCopyLiteral(v, 1)];
88    ++histo->blue_[PixOrCopyLiteral(v, 0)];
89  } else if (PixOrCopyIsCacheIdx(v)) {
90    int literal_ix = 256 + NUM_LENGTH_CODES + PixOrCopyCacheIdx(v);
91    ++histo->literal_[literal_ix];
92  } else {
93    int code, extra_bits_count, extra_bits_value;
94    PrefixEncode(PixOrCopyLength(v),
95                 &code, &extra_bits_count, &extra_bits_value);
96    ++histo->literal_[256 + code];
97    PrefixEncode(PixOrCopyDistance(v),
98                 &code, &extra_bits_count, &extra_bits_value);
99    ++histo->distance_[code];
100  }
101}
102
103static double BitsEntropy(const int* const array, int n) {
104  double retval = 0.;
105  int sum = 0;
106  int nonzeros = 0;
107  int max_val = 0;
108  int i;
109  double mix;
110  for (i = 0; i < n; ++i) {
111    if (array[i] != 0) {
112      sum += array[i];
113      ++nonzeros;
114      retval -= VP8LFastSLog2(array[i]);
115      if (max_val < array[i]) {
116        max_val = array[i];
117      }
118    }
119  }
120  retval += VP8LFastSLog2(sum);
121
122  if (nonzeros < 5) {
123    if (nonzeros <= 1) {
124      return 0;
125    }
126    // Two symbols, they will be 0 and 1 in a Huffman code.
127    // Let's mix in a bit of entropy to favor good clustering when
128    // distributions of these are combined.
129    if (nonzeros == 2) {
130      return 0.99 * sum + 0.01 * retval;
131    }
132    // No matter what the entropy says, we cannot be better than min_limit
133    // with Huffman coding. I am mixing a bit of entropy into the
134    // min_limit since it produces much better (~0.5 %) compression results
135    // perhaps because of better entropy clustering.
136    if (nonzeros == 3) {
137      mix = 0.95;
138    } else {
139      mix = 0.7;  // nonzeros == 4.
140    }
141  } else {
142    mix = 0.627;
143  }
144
145  {
146    double min_limit = 2 * sum - max_val;
147    min_limit = mix * min_limit + (1.0 - mix) * retval;
148    return (retval < min_limit) ? min_limit : retval;
149  }
150}
151
152// Returns the cost encode the rle-encoded entropy code.
153// The constants in this function are experimental.
154static double HuffmanCost(const int* const population, int length) {
155  // Small bias because Huffman code length is typically not stored in
156  // full length.
157  static const int kHuffmanCodeOfHuffmanCodeSize = CODE_LENGTH_CODES * 3;
158  static const double kSmallBias = 9.1;
159  double retval = kHuffmanCodeOfHuffmanCodeSize - kSmallBias;
160  int streak = 0;
161  int i = 0;
162  for (; i < length - 1; ++i) {
163    ++streak;
164    if (population[i] == population[i + 1]) {
165      continue;
166    }
167 last_streak_hack:
168    // population[i] points now to the symbol in the streak of same values.
169    if (streak > 3) {
170      if (population[i] == 0) {
171        retval += 1.5625 + 0.234375 * streak;
172      } else {
173        retval += 2.578125 + 0.703125 * streak;
174      }
175    } else {
176      if (population[i] == 0) {
177        retval += 1.796875 * streak;
178      } else {
179        retval += 3.28125 * streak;
180      }
181    }
182    streak = 0;
183  }
184  if (i == length - 1) {
185    ++streak;
186    goto last_streak_hack;
187  }
188  return retval;
189}
190
191static double PopulationCost(const int* const population, int length) {
192  return BitsEntropy(population, length) + HuffmanCost(population, length);
193}
194
195static double ExtraCost(const int* const population, int length) {
196  int i;
197  double cost = 0.;
198  for (i = 2; i < length - 2; ++i) cost += (i >> 1) * population[i + 2];
199  return cost;
200}
201
202// Estimates the Entropy + Huffman + other block overhead size cost.
203double VP8LHistogramEstimateBits(const VP8LHistogram* const p) {
204  return PopulationCost(p->literal_, VP8LHistogramNumCodes(p))
205       + PopulationCost(p->red_, 256)
206       + PopulationCost(p->blue_, 256)
207       + PopulationCost(p->alpha_, 256)
208       + PopulationCost(p->distance_, NUM_DISTANCE_CODES)
209       + ExtraCost(p->literal_ + 256, NUM_LENGTH_CODES)
210       + ExtraCost(p->distance_, NUM_DISTANCE_CODES);
211}
212
213double VP8LHistogramEstimateBitsBulk(const VP8LHistogram* const p) {
214  return BitsEntropy(p->literal_, VP8LHistogramNumCodes(p))
215       + BitsEntropy(p->red_, 256)
216       + BitsEntropy(p->blue_, 256)
217       + BitsEntropy(p->alpha_, 256)
218       + BitsEntropy(p->distance_, NUM_DISTANCE_CODES)
219       + ExtraCost(p->literal_ + 256, NUM_LENGTH_CODES)
220       + ExtraCost(p->distance_, NUM_DISTANCE_CODES);
221}
222
223// -----------------------------------------------------------------------------
224// Various histogram combine/cost-eval functions
225
226// Adds 'in' histogram to 'out'
227static void HistogramAdd(const VP8LHistogram* const in,
228                         VP8LHistogram* const out) {
229  int i;
230  for (i = 0; i < PIX_OR_COPY_CODES_MAX; ++i) {
231    out->literal_[i] += in->literal_[i];
232  }
233  for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
234    out->distance_[i] += in->distance_[i];
235  }
236  for (i = 0; i < 256; ++i) {
237    out->red_[i] += in->red_[i];
238    out->blue_[i] += in->blue_[i];
239    out->alpha_[i] += in->alpha_[i];
240  }
241}
242
243// Performs out = a + b, computing the cost C(a+b) - C(a) - C(b) while comparing
244// to the threshold value 'cost_threshold'. The score returned is
245//  Score = C(a+b) - C(a) - C(b), where C(a) + C(b) is known and fixed.
246// Since the previous score passed is 'cost_threshold', we only need to compare
247// the partial cost against 'cost_threshold + C(a) + C(b)' to possibly bail-out
248// early.
249static double HistogramAddEval(const VP8LHistogram* const a,
250                               const VP8LHistogram* const b,
251                               VP8LHistogram* const out,
252                               double cost_threshold) {
253  double cost = 0;
254  const double sum_cost = a->bit_cost_ + b->bit_cost_;
255  int i;
256
257  cost_threshold += sum_cost;
258
259  // palette_code_bits_ is part of the cost evaluation for literal_.
260  // TODO(skal): remove/simplify this palette_code_bits_?
261  out->palette_code_bits_ =
262      (a->palette_code_bits_ > b->palette_code_bits_) ? a->palette_code_bits_ :
263                                                        b->palette_code_bits_;
264  for (i = 0; i < PIX_OR_COPY_CODES_MAX; ++i) {
265    out->literal_[i] = a->literal_[i] + b->literal_[i];
266  }
267  cost += PopulationCost(out->literal_, VP8LHistogramNumCodes(out));
268  cost += ExtraCost(out->literal_ + 256, NUM_LENGTH_CODES);
269  if (cost > cost_threshold) return cost;
270
271  for (i = 0; i < 256; ++i) out->red_[i] = a->red_[i] + b->red_[i];
272  cost += PopulationCost(out->red_, 256);
273  if (cost > cost_threshold) return cost;
274
275  for (i = 0; i < 256; ++i) out->blue_[i] = a->blue_[i] + b->blue_[i];
276  cost += PopulationCost(out->blue_, 256);
277  if (cost > cost_threshold) return cost;
278
279  for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
280    out->distance_[i] = a->distance_[i] + b->distance_[i];
281  }
282  cost += PopulationCost(out->distance_, NUM_DISTANCE_CODES);
283  cost += ExtraCost(out->distance_, NUM_DISTANCE_CODES);
284  if (cost > cost_threshold) return cost;
285
286  for (i = 0; i < 256; ++i) out->alpha_[i] = a->alpha_[i] + b->alpha_[i];
287  cost += PopulationCost(out->alpha_, 256);
288
289  out->bit_cost_ = cost;
290  return cost - sum_cost;
291}
292
293// Same as HistogramAddEval(), except that the resulting histogram
294// is not stored. Only the cost C(a+b) - C(a) is evaluated. We omit
295// the term C(b) which is constant over all the evaluations.
296static double HistogramAddThresh(const VP8LHistogram* const a,
297                                 const VP8LHistogram* const b,
298                                 double cost_threshold) {
299  int tmp[PIX_OR_COPY_CODES_MAX];  // <= max storage we'll need
300  int i;
301  double cost = -a->bit_cost_;
302
303  for (i = 0; i < PIX_OR_COPY_CODES_MAX; ++i) {
304    tmp[i] = a->literal_[i] + b->literal_[i];
305  }
306  // note that the tests are ordered so that the usually largest
307  // cost shares come first.
308  cost += PopulationCost(tmp, VP8LHistogramNumCodes(a));
309  cost += ExtraCost(tmp + 256, NUM_LENGTH_CODES);
310  if (cost > cost_threshold) return cost;
311
312  for (i = 0; i < 256; ++i) tmp[i] = a->red_[i] + b->red_[i];
313  cost += PopulationCost(tmp, 256);
314  if (cost > cost_threshold) return cost;
315
316  for (i = 0; i < 256; ++i) tmp[i] = a->blue_[i] + b->blue_[i];
317  cost += PopulationCost(tmp, 256);
318  if (cost > cost_threshold) return cost;
319
320  for (i = 0; i < NUM_DISTANCE_CODES; ++i) {
321    tmp[i] = a->distance_[i] + b->distance_[i];
322  }
323  cost += PopulationCost(tmp, NUM_DISTANCE_CODES);
324  cost += ExtraCost(tmp, NUM_DISTANCE_CODES);
325  if (cost > cost_threshold) return cost;
326
327  for (i = 0; i < 256; ++i) tmp[i] = a->alpha_[i] + b->alpha_[i];
328  cost += PopulationCost(tmp, 256);
329
330  return cost;
331}
332
333// -----------------------------------------------------------------------------
334
335static void HistogramBuildImage(int xsize, int histo_bits,
336                                const VP8LBackwardRefs* const backward_refs,
337                                VP8LHistogramSet* const image) {
338  int i;
339  int x = 0, y = 0;
340  const int histo_xsize = VP8LSubSampleSize(xsize, histo_bits);
341  VP8LHistogram** const histograms = image->histograms;
342  assert(histo_bits > 0);
343  for (i = 0; i < backward_refs->size; ++i) {
344    const PixOrCopy* const v = &backward_refs->refs[i];
345    const int ix = (y >> histo_bits) * histo_xsize + (x >> histo_bits);
346    VP8LHistogramAddSinglePixOrCopy(histograms[ix], v);
347    x += PixOrCopyLength(v);
348    while (x >= xsize) {
349      x -= xsize;
350      ++y;
351    }
352  }
353}
354
355static uint32_t MyRand(uint32_t *seed) {
356  *seed *= 16807U;
357  if (*seed == 0) {
358    *seed = 1;
359  }
360  return *seed;
361}
362
363static int HistogramCombine(const VP8LHistogramSet* const in,
364                            VP8LHistogramSet* const out, int iter_mult,
365                            int num_pairs, int num_tries_no_success) {
366  int ok = 0;
367  int i, iter;
368  uint32_t seed = 0;
369  int tries_with_no_success = 0;
370  int out_size = in->size;
371  const int outer_iters = in->size * iter_mult;
372  const int min_cluster_size = 2;
373  VP8LHistogram* const histos = (VP8LHistogram*)malloc(2 * sizeof(*histos));
374  VP8LHistogram* cur_combo = histos + 0;    // trial merged histogram
375  VP8LHistogram* best_combo = histos + 1;   // best merged histogram so far
376  if (histos == NULL) goto End;
377
378  // Copy histograms from in[] to out[].
379  assert(in->size <= out->size);
380  for (i = 0; i < in->size; ++i) {
381    in->histograms[i]->bit_cost_ = VP8LHistogramEstimateBits(in->histograms[i]);
382    *out->histograms[i] = *in->histograms[i];
383  }
384
385  // Collapse similar histograms in 'out'.
386  for (iter = 0; iter < outer_iters && out_size >= min_cluster_size; ++iter) {
387    double best_cost_diff = 0.;
388    int best_idx1 = -1, best_idx2 = 1;
389    int j;
390    const int num_tries = (num_pairs < out_size) ? num_pairs : out_size;
391    seed += iter;
392    for (j = 0; j < num_tries; ++j) {
393      double curr_cost_diff;
394      // Choose two histograms at random and try to combine them.
395      const uint32_t idx1 = MyRand(&seed) % out_size;
396      const uint32_t tmp = (j & 7) + 1;
397      const uint32_t diff = (tmp < 3) ? tmp : MyRand(&seed) % (out_size - 1);
398      const uint32_t idx2 = (idx1 + diff + 1) % out_size;
399      if (idx1 == idx2) {
400        continue;
401      }
402      // Calculate cost reduction on combining.
403      curr_cost_diff = HistogramAddEval(out->histograms[idx1],
404                                        out->histograms[idx2],
405                                        cur_combo, best_cost_diff);
406      if (curr_cost_diff < best_cost_diff) {    // found a better pair?
407        {     // swap cur/best combo histograms
408          VP8LHistogram* const tmp_histo = cur_combo;
409          cur_combo = best_combo;
410          best_combo = tmp_histo;
411        }
412        best_cost_diff = curr_cost_diff;
413        best_idx1 = idx1;
414        best_idx2 = idx2;
415      }
416    }
417
418    if (best_idx1 >= 0) {
419      *out->histograms[best_idx1] = *best_combo;
420      // swap best_idx2 slot with last one (which is now unused)
421      --out_size;
422      if (best_idx2 != out_size) {
423        out->histograms[best_idx2] = out->histograms[out_size];
424        out->histograms[out_size] = NULL;   // just for sanity check.
425      }
426      tries_with_no_success = 0;
427    }
428    if (++tries_with_no_success >= num_tries_no_success) {
429      break;
430    }
431  }
432  out->size = out_size;
433  ok = 1;
434
435 End:
436  free(histos);
437  return ok;
438}
439
440// -----------------------------------------------------------------------------
441// Histogram refinement
442
443// What is the bit cost of moving square_histogram from cur_symbol to candidate.
444static double HistogramDistance(const VP8LHistogram* const square_histogram,
445                                const VP8LHistogram* const candidate,
446                                double cost_threshold) {
447  return HistogramAddThresh(candidate, square_histogram, cost_threshold);
448}
449
450// Find the best 'out' histogram for each of the 'in' histograms.
451// Note: we assume that out[]->bit_cost_ is already up-to-date.
452static void HistogramRemap(const VP8LHistogramSet* const in,
453                           const VP8LHistogramSet* const out,
454                           uint16_t* const symbols) {
455  int i;
456  for (i = 0; i < in->size; ++i) {
457    int best_out = 0;
458    double best_bits =
459        HistogramDistance(in->histograms[i], out->histograms[0], 1.e38);
460    int k;
461    for (k = 1; k < out->size; ++k) {
462      const double cur_bits =
463          HistogramDistance(in->histograms[i], out->histograms[k], best_bits);
464      if (cur_bits < best_bits) {
465        best_bits = cur_bits;
466        best_out = k;
467      }
468    }
469    symbols[i] = best_out;
470  }
471
472  // Recompute each out based on raw and symbols.
473  for (i = 0; i < out->size; ++i) {
474    HistogramClear(out->histograms[i]);
475  }
476  for (i = 0; i < in->size; ++i) {
477    HistogramAdd(in->histograms[i], out->histograms[symbols[i]]);
478  }
479}
480
481int VP8LGetHistoImageSymbols(int xsize, int ysize,
482                             const VP8LBackwardRefs* const refs,
483                             int quality, int histo_bits, int cache_bits,
484                             VP8LHistogramSet* const image_in,
485                             uint16_t* const histogram_symbols) {
486  int ok = 0;
487  const int histo_xsize = histo_bits ? VP8LSubSampleSize(xsize, histo_bits) : 1;
488  const int histo_ysize = histo_bits ? VP8LSubSampleSize(ysize, histo_bits) : 1;
489  const int histo_image_raw_size = histo_xsize * histo_ysize;
490
491  // Heuristic params for HistogramCombine().
492  const int num_tries_no_success = 8 + (quality >> 1);
493  const int iter_mult = (quality < 27) ? 1 : 1 + ((quality - 27) >> 4);
494  const int num_pairs = (quality < 25) ? 10 : (5 * quality) >> 3;
495
496  VP8LHistogramSet* const image_out =
497      VP8LAllocateHistogramSet(histo_image_raw_size, cache_bits);
498  if (image_out == NULL) return 0;
499
500  // Build histogram image.
501  HistogramBuildImage(xsize, histo_bits, refs, image_out);
502  // Collapse similar histograms.
503  if (!HistogramCombine(image_out, image_in, iter_mult, num_pairs,
504                        num_tries_no_success)) {
505    goto Error;
506  }
507  // Find the optimal map from original histograms to the final ones.
508  HistogramRemap(image_out, image_in, histogram_symbols);
509  ok = 1;
510
511Error:
512  free(image_out);
513  return ok;
514}
515