1/* Copyright 2015 Google Inc. All Rights Reserved.
2
3   Distributed under MIT license.
4   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5*/
6
7/* Algorithms for distributing the literals and commands of a metablock between
8   block types and contexts. */
9
10#include "./metablock.h"
11
12#include "../common/constants.h"
13#include <brotli/types.h>
14#include "./bit_cost.h"
15#include "./block_splitter.h"
16#include "./cluster.h"
17#include "./context.h"
18#include "./entropy_encode.h"
19#include "./histogram.h"
20#include "./memory.h"
21#include "./port.h"
22#include "./quality.h"
23
24#if defined(__cplusplus) || defined(c_plusplus)
25extern "C" {
26#endif
27
28void BrotliBuildMetaBlock(MemoryManager* m,
29                          const uint8_t* ringbuffer,
30                          const size_t pos,
31                          const size_t mask,
32                          const BrotliEncoderParams* params,
33                          uint8_t prev_byte,
34                          uint8_t prev_byte2,
35                          const Command* cmds,
36                          size_t num_commands,
37                          ContextType literal_context_mode,
38                          MetaBlockSplit* mb) {
39  /* Histogram ids need to fit in one byte. */
40  static const size_t kMaxNumberOfHistograms = 256;
41  HistogramDistance* distance_histograms;
42  HistogramLiteral* literal_histograms;
43  ContextType* literal_context_modes = NULL;
44  size_t literal_histograms_size;
45  size_t distance_histograms_size;
46  size_t i;
47  size_t literal_context_multiplier = 1;
48
49  BrotliSplitBlock(m, cmds, num_commands,
50                   ringbuffer, pos, mask, params,
51                   &mb->literal_split,
52                   &mb->command_split,
53                   &mb->distance_split);
54  if (BROTLI_IS_OOM(m)) return;
55
56  if (!params->disable_literal_context_modeling) {
57    literal_context_multiplier = 1 << BROTLI_LITERAL_CONTEXT_BITS;
58    literal_context_modes =
59        BROTLI_ALLOC(m, ContextType, mb->literal_split.num_types);
60    if (BROTLI_IS_OOM(m)) return;
61    for (i = 0; i < mb->literal_split.num_types; ++i) {
62      literal_context_modes[i] = literal_context_mode;
63    }
64  }
65
66  literal_histograms_size =
67      mb->literal_split.num_types * literal_context_multiplier;
68  literal_histograms =
69      BROTLI_ALLOC(m, HistogramLiteral, literal_histograms_size);
70  if (BROTLI_IS_OOM(m)) return;
71  ClearHistogramsLiteral(literal_histograms, literal_histograms_size);
72
73  distance_histograms_size =
74      mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
75  distance_histograms =
76      BROTLI_ALLOC(m, HistogramDistance, distance_histograms_size);
77  if (BROTLI_IS_OOM(m)) return;
78  ClearHistogramsDistance(distance_histograms, distance_histograms_size);
79
80  assert(mb->command_histograms == 0);
81  mb->command_histograms_size = mb->command_split.num_types;
82  mb->command_histograms =
83      BROTLI_ALLOC(m, HistogramCommand, mb->command_histograms_size);
84  if (BROTLI_IS_OOM(m)) return;
85  ClearHistogramsCommand(mb->command_histograms, mb->command_histograms_size);
86
87  BrotliBuildHistogramsWithContext(cmds, num_commands,
88      &mb->literal_split, &mb->command_split, &mb->distance_split,
89      ringbuffer, pos, mask, prev_byte, prev_byte2, literal_context_modes,
90      literal_histograms, mb->command_histograms, distance_histograms);
91  BROTLI_FREE(m, literal_context_modes);
92
93  assert(mb->literal_context_map == 0);
94  mb->literal_context_map_size =
95      mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
96  mb->literal_context_map =
97      BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
98  if (BROTLI_IS_OOM(m)) return;
99
100  assert(mb->literal_histograms == 0);
101  mb->literal_histograms_size = mb->literal_context_map_size;
102  mb->literal_histograms =
103      BROTLI_ALLOC(m, HistogramLiteral, mb->literal_histograms_size);
104  if (BROTLI_IS_OOM(m)) return;
105
106  BrotliClusterHistogramsLiteral(m, literal_histograms, literal_histograms_size,
107      kMaxNumberOfHistograms, mb->literal_histograms,
108      &mb->literal_histograms_size, mb->literal_context_map);
109  if (BROTLI_IS_OOM(m)) return;
110  BROTLI_FREE(m, literal_histograms);
111
112  if (params->disable_literal_context_modeling) {
113    /* Distribute assignment to all contexts. */
114    for (i = mb->literal_split.num_types; i != 0;) {
115      size_t j = 0;
116      i--;
117      for (; j < (1 << BROTLI_LITERAL_CONTEXT_BITS); j++) {
118        mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] =
119            mb->literal_context_map[i];
120      }
121    }
122  }
123
124  assert(mb->distance_context_map == 0);
125  mb->distance_context_map_size =
126      mb->distance_split.num_types << BROTLI_DISTANCE_CONTEXT_BITS;
127  mb->distance_context_map =
128      BROTLI_ALLOC(m, uint32_t, mb->distance_context_map_size);
129  if (BROTLI_IS_OOM(m)) return;
130
131  assert(mb->distance_histograms == 0);
132  mb->distance_histograms_size = mb->distance_context_map_size;
133  mb->distance_histograms =
134      BROTLI_ALLOC(m, HistogramDistance, mb->distance_histograms_size);
135  if (BROTLI_IS_OOM(m)) return;
136
137  BrotliClusterHistogramsDistance(m, distance_histograms,
138                                  mb->distance_context_map_size,
139                                  kMaxNumberOfHistograms,
140                                  mb->distance_histograms,
141                                  &mb->distance_histograms_size,
142                                  mb->distance_context_map);
143  if (BROTLI_IS_OOM(m)) return;
144  BROTLI_FREE(m, distance_histograms);
145}
146
147#define FN(X) X ## Literal
148#include "./metablock_inc.h"  /* NOLINT(build/include) */
149#undef FN
150
151#define FN(X) X ## Command
152#include "./metablock_inc.h"  /* NOLINT(build/include) */
153#undef FN
154
155#define FN(X) X ## Distance
156#include "./metablock_inc.h"  /* NOLINT(build/include) */
157#undef FN
158
159#define BROTLI_MAX_STATIC_CONTEXTS 13
160
161/* Greedy block splitter for one block category (literal, command or distance).
162   Gathers histograms for all context buckets. */
163typedef struct ContextBlockSplitter {
164  /* Alphabet size of particular block category. */
165  size_t alphabet_size_;
166  size_t num_contexts_;
167  size_t max_block_types_;
168  /* We collect at least this many symbols for each block. */
169  size_t min_block_size_;
170  /* We merge histograms A and B if
171       entropy(A+B) < entropy(A) + entropy(B) + split_threshold_,
172     where A is the current histogram and B is the histogram of the last or the
173     second last block type. */
174  double split_threshold_;
175
176  size_t num_blocks_;
177  BlockSplit* split_;  /* not owned */
178  HistogramLiteral* histograms_;  /* not owned */
179  size_t* histograms_size_;  /* not owned */
180
181  /* The number of symbols that we want to collect before deciding on whether
182     or not to merge the block with a previous one or emit a new block. */
183  size_t target_block_size_;
184  /* The number of symbols in the current histogram. */
185  size_t block_size_;
186  /* Offset of the current histogram. */
187  size_t curr_histogram_ix_;
188  /* Offset of the histograms of the previous two block types. */
189  size_t last_histogram_ix_[2];
190  /* Entropy of the previous two block types. */
191  double last_entropy_[2 * BROTLI_MAX_STATIC_CONTEXTS];
192  /* The number of times we merged the current block with the last one. */
193  size_t merge_last_count_;
194} ContextBlockSplitter;
195
196static void InitContextBlockSplitter(
197    MemoryManager* m, ContextBlockSplitter* self, size_t alphabet_size,
198    size_t num_contexts, size_t min_block_size, double split_threshold,
199    size_t num_symbols, BlockSplit* split, HistogramLiteral** histograms,
200    size_t* histograms_size) {
201  size_t max_num_blocks = num_symbols / min_block_size + 1;
202  size_t max_num_types;
203  assert(num_contexts <= BROTLI_MAX_STATIC_CONTEXTS);
204
205  self->alphabet_size_ = alphabet_size;
206  self->num_contexts_ = num_contexts;
207  self->max_block_types_ = BROTLI_MAX_NUMBER_OF_BLOCK_TYPES / num_contexts;
208  self->min_block_size_ = min_block_size;
209  self->split_threshold_ = split_threshold;
210  self->num_blocks_ = 0;
211  self->split_ = split;
212  self->histograms_size_ = histograms_size;
213  self->target_block_size_ = min_block_size;
214  self->block_size_ = 0;
215  self->curr_histogram_ix_ = 0;
216  self->merge_last_count_ = 0;
217
218  /* We have to allocate one more histogram than the maximum number of block
219     types for the current histogram when the meta-block is too big. */
220  max_num_types =
221      BROTLI_MIN(size_t, max_num_blocks, self->max_block_types_ + 1);
222  BROTLI_ENSURE_CAPACITY(m, uint8_t,
223      split->types, split->types_alloc_size, max_num_blocks);
224  BROTLI_ENSURE_CAPACITY(m, uint32_t,
225      split->lengths, split->lengths_alloc_size, max_num_blocks);
226  if (BROTLI_IS_OOM(m)) return;
227  split->num_blocks = max_num_blocks;
228  if (BROTLI_IS_OOM(m)) return;
229  assert(*histograms == 0);
230  *histograms_size = max_num_types * num_contexts;
231  *histograms = BROTLI_ALLOC(m, HistogramLiteral, *histograms_size);
232  self->histograms_ = *histograms;
233  if (BROTLI_IS_OOM(m)) return;
234  /* Clear only current histogram. */
235  ClearHistogramsLiteral(&self->histograms_[0], num_contexts);
236  self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0;
237}
238
239/* Does either of three things:
240     (1) emits the current block with a new block type;
241     (2) emits the current block with the type of the second last block;
242     (3) merges the current block with the last block. */
243static void ContextBlockSplitterFinishBlock(
244    ContextBlockSplitter* self, MemoryManager* m, BROTLI_BOOL is_final) {
245  BlockSplit* split = self->split_;
246  const size_t num_contexts = self->num_contexts_;
247  double* last_entropy = self->last_entropy_;
248  HistogramLiteral* histograms = self->histograms_;
249
250  if (self->block_size_ < self->min_block_size_) {
251    self->block_size_ = self->min_block_size_;
252  }
253  if (self->num_blocks_ == 0) {
254    size_t i;
255    /* Create first block. */
256    split->lengths[0] = (uint32_t)self->block_size_;
257    split->types[0] = 0;
258
259    for (i = 0; i < num_contexts; ++i) {
260      last_entropy[i] =
261          BitsEntropy(histograms[i].data_, self->alphabet_size_);
262      last_entropy[num_contexts + i] = last_entropy[i];
263    }
264    ++self->num_blocks_;
265    ++split->num_types;
266    self->curr_histogram_ix_ += num_contexts;
267    if (self->curr_histogram_ix_ < *self->histograms_size_) {
268      ClearHistogramsLiteral(
269          &self->histograms_[self->curr_histogram_ix_], self->num_contexts_);
270    }
271    self->block_size_ = 0;
272  } else if (self->block_size_ > 0) {
273    /* Try merging the set of histograms for the current block type with the
274       respective set of histograms for the last and second last block types.
275       Decide over the split based on the total reduction of entropy across
276       all contexts. */
277    double entropy[BROTLI_MAX_STATIC_CONTEXTS];
278    HistogramLiteral* combined_histo =
279        BROTLI_ALLOC(m, HistogramLiteral, 2 * num_contexts);
280    double combined_entropy[2 * BROTLI_MAX_STATIC_CONTEXTS];
281    double diff[2] = { 0.0 };
282    size_t i;
283    if (BROTLI_IS_OOM(m)) return;
284    for (i = 0; i < num_contexts; ++i) {
285      size_t curr_histo_ix = self->curr_histogram_ix_ + i;
286      size_t j;
287      entropy[i] = BitsEntropy(histograms[curr_histo_ix].data_,
288                               self->alphabet_size_);
289      for (j = 0; j < 2; ++j) {
290        size_t jx = j * num_contexts + i;
291        size_t last_histogram_ix = self->last_histogram_ix_[j] + i;
292        combined_histo[jx] = histograms[curr_histo_ix];
293        HistogramAddHistogramLiteral(&combined_histo[jx],
294            &histograms[last_histogram_ix]);
295        combined_entropy[jx] = BitsEntropy(
296            &combined_histo[jx].data_[0], self->alphabet_size_);
297        diff[j] += combined_entropy[jx] - entropy[i] - last_entropy[jx];
298      }
299    }
300
301    if (split->num_types < self->max_block_types_ &&
302        diff[0] > self->split_threshold_ &&
303        diff[1] > self->split_threshold_) {
304      /* Create new block. */
305      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
306      split->types[self->num_blocks_] = (uint8_t)split->num_types;
307      self->last_histogram_ix_[1] = self->last_histogram_ix_[0];
308      self->last_histogram_ix_[0] = split->num_types * num_contexts;
309      for (i = 0; i < num_contexts; ++i) {
310        last_entropy[num_contexts + i] = last_entropy[i];
311        last_entropy[i] = entropy[i];
312      }
313      ++self->num_blocks_;
314      ++split->num_types;
315      self->curr_histogram_ix_ += num_contexts;
316      if (self->curr_histogram_ix_ < *self->histograms_size_) {
317        ClearHistogramsLiteral(
318            &self->histograms_[self->curr_histogram_ix_], self->num_contexts_);
319      }
320      self->block_size_ = 0;
321      self->merge_last_count_ = 0;
322      self->target_block_size_ = self->min_block_size_;
323    } else if (diff[1] < diff[0] - 20.0) {
324      /* Combine this block with second last block. */
325      split->lengths[self->num_blocks_] = (uint32_t)self->block_size_;
326      split->types[self->num_blocks_] = split->types[self->num_blocks_ - 2];
327      BROTLI_SWAP(size_t, self->last_histogram_ix_, 0, 1);
328      for (i = 0; i < num_contexts; ++i) {
329        histograms[self->last_histogram_ix_[0] + i] =
330            combined_histo[num_contexts + i];
331        last_entropy[num_contexts + i] = last_entropy[i];
332        last_entropy[i] = combined_entropy[num_contexts + i];
333        HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]);
334      }
335      ++self->num_blocks_;
336      self->block_size_ = 0;
337      self->merge_last_count_ = 0;
338      self->target_block_size_ = self->min_block_size_;
339    } else {
340      /* Combine this block with last block. */
341      split->lengths[self->num_blocks_ - 1] += (uint32_t)self->block_size_;
342      for (i = 0; i < num_contexts; ++i) {
343        histograms[self->last_histogram_ix_[0] + i] = combined_histo[i];
344        last_entropy[i] = combined_entropy[i];
345        if (split->num_types == 1) {
346          last_entropy[num_contexts + i] = last_entropy[i];
347        }
348        HistogramClearLiteral(&histograms[self->curr_histogram_ix_ + i]);
349      }
350      self->block_size_ = 0;
351      if (++self->merge_last_count_ > 1) {
352        self->target_block_size_ += self->min_block_size_;
353      }
354    }
355    BROTLI_FREE(m, combined_histo);
356  }
357  if (is_final) {
358    *self->histograms_size_ = split->num_types * num_contexts;
359    split->num_blocks = self->num_blocks_;
360  }
361}
362
363/* Adds the next symbol to the current block type and context. When the
364   current block reaches the target size, decides on merging the block. */
365static void ContextBlockSplitterAddSymbol(
366    ContextBlockSplitter* self, MemoryManager* m,
367    size_t symbol, size_t context) {
368  HistogramAddLiteral(&self->histograms_[self->curr_histogram_ix_ + context],
369      symbol);
370  ++self->block_size_;
371  if (self->block_size_ == self->target_block_size_) {
372    ContextBlockSplitterFinishBlock(self, m, /* is_final = */ BROTLI_FALSE);
373    if (BROTLI_IS_OOM(m)) return;
374  }
375}
376
377static void MapStaticContexts(MemoryManager* m,
378                              size_t num_contexts,
379                              const uint32_t* static_context_map,
380                              MetaBlockSplit* mb) {
381  size_t i;
382  assert(mb->literal_context_map == 0);
383  mb->literal_context_map_size =
384      mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS;
385  mb->literal_context_map =
386      BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size);
387  if (BROTLI_IS_OOM(m)) return;
388
389  for (i = 0; i < mb->literal_split.num_types; ++i) {
390    uint32_t offset = (uint32_t)(i * num_contexts);
391    size_t j;
392    for (j = 0; j < (1u << BROTLI_LITERAL_CONTEXT_BITS); ++j) {
393      mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] =
394          offset + static_context_map[j];
395    }
396  }
397}
398
399static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal(
400    MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask,
401    uint8_t prev_byte, uint8_t prev_byte2, ContextType literal_context_mode,
402    const size_t num_contexts, const uint32_t* static_context_map,
403    const Command *commands, size_t n_commands, MetaBlockSplit* mb) {
404  union {
405    BlockSplitterLiteral plain;
406    ContextBlockSplitter ctx;
407  } lit_blocks;
408  BlockSplitterCommand cmd_blocks;
409  BlockSplitterDistance dist_blocks;
410  size_t num_literals = 0;
411  size_t i;
412  for (i = 0; i < n_commands; ++i) {
413    num_literals += commands[i].insert_len_;
414  }
415
416  if (num_contexts == 1) {
417    InitBlockSplitterLiteral(m, &lit_blocks.plain, 256, 512, 400.0,
418        num_literals, &mb->literal_split, &mb->literal_histograms,
419        &mb->literal_histograms_size);
420  } else {
421    InitContextBlockSplitter(m, &lit_blocks.ctx, 256, num_contexts, 512, 400.0,
422        num_literals, &mb->literal_split, &mb->literal_histograms,
423        &mb->literal_histograms_size);
424  }
425  if (BROTLI_IS_OOM(m)) return;
426  InitBlockSplitterCommand(m, &cmd_blocks, BROTLI_NUM_COMMAND_SYMBOLS, 1024,
427      500.0, n_commands, &mb->command_split, &mb->command_histograms,
428      &mb->command_histograms_size);
429  if (BROTLI_IS_OOM(m)) return;
430  InitBlockSplitterDistance(m, &dist_blocks, 64, 512, 100.0, n_commands,
431      &mb->distance_split, &mb->distance_histograms,
432      &mb->distance_histograms_size);
433  if (BROTLI_IS_OOM(m)) return;
434
435  for (i = 0; i < n_commands; ++i) {
436    const Command cmd = commands[i];
437    size_t j;
438    BlockSplitterAddSymbolCommand(&cmd_blocks, cmd.cmd_prefix_);
439    for (j = cmd.insert_len_; j != 0; --j) {
440      uint8_t literal = ringbuffer[pos & mask];
441      if (num_contexts == 1) {
442        BlockSplitterAddSymbolLiteral(&lit_blocks.plain, literal);
443      } else {
444        size_t context = Context(prev_byte, prev_byte2, literal_context_mode);
445        ContextBlockSplitterAddSymbol(&lit_blocks.ctx, m, literal,
446                                      static_context_map[context]);
447        if (BROTLI_IS_OOM(m)) return;
448      }
449      prev_byte2 = prev_byte;
450      prev_byte = literal;
451      ++pos;
452    }
453    pos += CommandCopyLen(&cmd);
454    if (CommandCopyLen(&cmd)) {
455      prev_byte2 = ringbuffer[(pos - 2) & mask];
456      prev_byte = ringbuffer[(pos - 1) & mask];
457      if (cmd.cmd_prefix_ >= 128) {
458        BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_);
459      }
460    }
461  }
462
463  if (num_contexts == 1) {
464    BlockSplitterFinishBlockLiteral(
465        &lit_blocks.plain, /* is_final = */ BROTLI_TRUE);
466  } else {
467    ContextBlockSplitterFinishBlock(
468        &lit_blocks.ctx, m, /* is_final = */ BROTLI_TRUE);
469    if (BROTLI_IS_OOM(m)) return;
470  }
471  BlockSplitterFinishBlockCommand(&cmd_blocks, /* is_final = */ BROTLI_TRUE);
472  BlockSplitterFinishBlockDistance(&dist_blocks, /* is_final = */ BROTLI_TRUE);
473
474  if (num_contexts > 1) {
475    MapStaticContexts(m, num_contexts, static_context_map, mb);
476  }
477}
478
479void BrotliBuildMetaBlockGreedy(MemoryManager* m,
480                                const uint8_t* ringbuffer,
481                                size_t pos,
482                                size_t mask,
483                                uint8_t prev_byte,
484                                uint8_t prev_byte2,
485                                ContextType literal_context_mode,
486                                size_t num_contexts,
487                                const uint32_t* static_context_map,
488                                const Command* commands,
489                                size_t n_commands,
490                                MetaBlockSplit* mb) {
491  if (num_contexts == 1) {
492    BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte,
493        prev_byte2, literal_context_mode, 1, NULL, commands, n_commands, mb);
494  } else {
495    BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte,
496        prev_byte2, literal_context_mode, num_contexts, static_context_map,
497        commands, n_commands, mb);
498  }
499}
500
501void BrotliOptimizeHistograms(size_t num_direct_distance_codes,
502                              size_t distance_postfix_bits,
503                              MetaBlockSplit* mb) {
504  uint8_t good_for_rle[BROTLI_NUM_COMMAND_SYMBOLS];
505  size_t num_distance_codes;
506  size_t i;
507  for (i = 0; i < mb->literal_histograms_size; ++i) {
508    BrotliOptimizeHuffmanCountsForRle(256, mb->literal_histograms[i].data_,
509                                      good_for_rle);
510  }
511  for (i = 0; i < mb->command_histograms_size; ++i) {
512    BrotliOptimizeHuffmanCountsForRle(BROTLI_NUM_COMMAND_SYMBOLS,
513                                      mb->command_histograms[i].data_,
514                                      good_for_rle);
515  }
516  num_distance_codes = BROTLI_NUM_DISTANCE_SHORT_CODES +
517      num_direct_distance_codes +
518      ((2 * BROTLI_MAX_DISTANCE_BITS) << distance_postfix_bits);
519  for (i = 0; i < mb->distance_histograms_size; ++i) {
520    BrotliOptimizeHuffmanCountsForRle(num_distance_codes,
521                                      mb->distance_histograms[i].data_,
522                                      good_for_rle);
523  }
524}
525
526#if defined(__cplusplus) || defined(c_plusplus)
527}  /* extern "C" */
528#endif
529