1/* Copyright 2013 Google Inc. All Rights Reserved.
2
3   Distributed under MIT license.
4   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5*/
6
7/* Block split point selection utilities. */
8
9#include "./block_splitter.h"
10
11#include <assert.h>
12#include <string.h>  /* memcpy, memset */
13
14#include "./bit_cost.h"
15#include "./cluster.h"
16#include "./command.h"
17#include "./fast_log.h"
18#include "./histogram.h"
19#include "./memory.h"
20#include "./port.h"
21#include "./quality.h"
22
23#if defined(__cplusplus) || defined(c_plusplus)
24extern "C" {
25#endif
26
27static const size_t kMaxLiteralHistograms = 100;
28static const size_t kMaxCommandHistograms = 50;
29static const double kLiteralBlockSwitchCost = 28.1;
30static const double kCommandBlockSwitchCost = 13.5;
31static const double kDistanceBlockSwitchCost = 14.6;
32static const size_t kLiteralStrideLength = 70;
33static const size_t kCommandStrideLength = 40;
34static const size_t kSymbolsPerLiteralHistogram = 544;
35static const size_t kSymbolsPerCommandHistogram = 530;
36static const size_t kSymbolsPerDistanceHistogram = 544;
37static const size_t kMinLengthForBlockSplitting = 128;
38static const size_t kIterMulForRefining = 2;
39static const size_t kMinItersForRefining = 100;
40
41static size_t CountLiterals(const Command* cmds, const size_t num_commands) {
42  /* Count how many we have. */
43  size_t total_length = 0;
44  size_t i;
45  for (i = 0; i < num_commands; ++i) {
46    total_length += cmds[i].insert_len_;
47  }
48  return total_length;
49}
50
51static void CopyLiteralsToByteArray(const Command* cmds,
52                                    const size_t num_commands,
53                                    const uint8_t* data,
54                                    const size_t offset,
55                                    const size_t mask,
56                                    uint8_t* literals) {
57  size_t pos = 0;
58  size_t from_pos = offset & mask;
59  size_t i;
60  for (i = 0; i < num_commands; ++i) {
61    size_t insert_len = cmds[i].insert_len_;
62    if (from_pos + insert_len > mask) {
63      size_t head_size = mask + 1 - from_pos;
64      memcpy(literals + pos, data + from_pos, head_size);
65      from_pos = 0;
66      pos += head_size;
67      insert_len -= head_size;
68    }
69    if (insert_len > 0) {
70      memcpy(literals + pos, data + from_pos, insert_len);
71      pos += insert_len;
72    }
73    from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask;
74  }
75}
76
77static BROTLI_INLINE unsigned int MyRand(unsigned int* seed) {
78  *seed *= 16807U;
79  if (*seed == 0) {
80    *seed = 1;
81  }
82  return *seed;
83}
84
85static BROTLI_INLINE double BitCost(size_t count) {
86  return count == 0 ? -2.0 : FastLog2(count);
87}
88
89#define HISTOGRAMS_PER_BATCH 64
90#define CLUSTERS_PER_BATCH 16
91
92#define FN(X) X ## Literal
93#define DataType uint8_t
94/* NOLINTNEXTLINE(build/include) */
95#include "./block_splitter_inc.h"
96#undef DataType
97#undef FN
98
99#define FN(X) X ## Command
100#define DataType uint16_t
101/* NOLINTNEXTLINE(build/include) */
102#include "./block_splitter_inc.h"
103#undef FN
104
105#define FN(X) X ## Distance
106/* NOLINTNEXTLINE(build/include) */
107#include "./block_splitter_inc.h"
108#undef DataType
109#undef FN
110
111void BrotliInitBlockSplit(BlockSplit* self) {
112  self->num_types = 0;
113  self->num_blocks = 0;
114  self->types = 0;
115  self->lengths = 0;
116  self->types_alloc_size = 0;
117  self->lengths_alloc_size = 0;
118}
119
120void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) {
121  BROTLI_FREE(m, self->types);
122  BROTLI_FREE(m, self->lengths);
123}
124
125void BrotliSplitBlock(MemoryManager* m,
126                      const Command* cmds,
127                      const size_t num_commands,
128                      const uint8_t* data,
129                      const size_t pos,
130                      const size_t mask,
131                      const BrotliEncoderParams* params,
132                      BlockSplit* literal_split,
133                      BlockSplit* insert_and_copy_split,
134                      BlockSplit* dist_split) {
135  {
136    size_t literals_count = CountLiterals(cmds, num_commands);
137    uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count);
138    if (BROTLI_IS_OOM(m)) return;
139    /* Create a continuous array of literals. */
140    CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals);
141    /* Create the block split on the array of literals.
142       Literal histograms have alphabet size 256. */
143    SplitByteVectorLiteral(
144        m, literals, literals_count,
145        kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
146        kLiteralStrideLength, kLiteralBlockSwitchCost, params,
147        literal_split);
148    if (BROTLI_IS_OOM(m)) return;
149    BROTLI_FREE(m, literals);
150  }
151
152  {
153    /* Compute prefix codes for commands. */
154    uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands);
155    size_t i;
156    if (BROTLI_IS_OOM(m)) return;
157    for (i = 0; i < num_commands; ++i) {
158      insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
159    }
160    /* Create the block split on the array of command prefixes. */
161    SplitByteVectorCommand(
162        m, insert_and_copy_codes, num_commands,
163        kSymbolsPerCommandHistogram, kMaxCommandHistograms,
164        kCommandStrideLength, kCommandBlockSwitchCost, params,
165        insert_and_copy_split);
166    if (BROTLI_IS_OOM(m)) return;
167    /* TODO: reuse for distances? */
168    BROTLI_FREE(m, insert_and_copy_codes);
169  }
170
171  {
172    /* Create a continuous array of distance prefixes. */
173    uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands);
174    size_t j = 0;
175    size_t i;
176    if (BROTLI_IS_OOM(m)) return;
177    for (i = 0; i < num_commands; ++i) {
178      const Command* cmd = &cmds[i];
179      if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
180        distance_prefixes[j++] = cmd->dist_prefix_;
181      }
182    }
183    /* Create the block split on the array of distance prefixes. */
184    SplitByteVectorDistance(
185        m, distance_prefixes, j,
186        kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
187        kCommandStrideLength, kDistanceBlockSwitchCost, params,
188        dist_split);
189    if (BROTLI_IS_OOM(m)) return;
190    BROTLI_FREE(m, distance_prefixes);
191  }
192}
193
194
195#if defined(__cplusplus) || defined(c_plusplus)
196}  /* extern "C" */
197#endif
198