1/* Copyright 2013 Google Inc. All Rights Reserved.
2
3   Distributed under MIT license.
4   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
5*/
6
7/* Block split point selection utilities. */
8
9#include "./block_splitter.h"
10
11#include <assert.h>
12#include <string.h>  /* memcpy, memset */
13
14#include "./bit_cost.h"
15#include "./cluster.h"
16#include "./command.h"
17#include "./fast_log.h"
18#include "./histogram.h"
19#include "./memory.h"
20#include "./port.h"
21#include "./quality.h"
22
23#if defined(__cplusplus) || defined(c_plusplus)
24extern "C" {
25#endif
26
27static const size_t kMaxLiteralHistograms = 100;
28static const size_t kMaxCommandHistograms = 50;
29static const double kLiteralBlockSwitchCost = 28.1;
30static const double kCommandBlockSwitchCost = 13.5;
31static const double kDistanceBlockSwitchCost = 14.6;
32static const size_t kLiteralStrideLength = 70;
33static const size_t kCommandStrideLength = 40;
34static const size_t kSymbolsPerLiteralHistogram = 544;
35static const size_t kSymbolsPerCommandHistogram = 530;
36static const size_t kSymbolsPerDistanceHistogram = 544;
37static const size_t kMinLengthForBlockSplitting = 128;
38static const size_t kIterMulForRefining = 2;
39static const size_t kMinItersForRefining = 100;
40
41static size_t CountLiterals(const Command* cmds, const size_t num_commands) {
42  /* Count how many we have. */
43  size_t total_length = 0;
44  size_t i;
45  for (i = 0; i < num_commands; ++i) {
46    total_length += cmds[i].insert_len_;
47  }
48  return total_length;
49}
50
51static void CopyLiteralsToByteArray(const Command* cmds,
52                                    const size_t num_commands,
53                                    const uint8_t* data,
54                                    const size_t offset,
55                                    const size_t mask,
56                                    uint8_t* literals) {
57  size_t pos = 0;
58  size_t from_pos = offset & mask;
59  size_t i;
60  for (i = 0; i < num_commands; ++i) {
61    size_t insert_len = cmds[i].insert_len_;
62    if (from_pos + insert_len > mask) {
63      size_t head_size = mask + 1 - from_pos;
64      memcpy(literals + pos, data + from_pos, head_size);
65      from_pos = 0;
66      pos += head_size;
67      insert_len -= head_size;
68    }
69    if (insert_len > 0) {
70      memcpy(literals + pos, data + from_pos, insert_len);
71      pos += insert_len;
72    }
73    from_pos = (from_pos + insert_len + CommandCopyLen(&cmds[i])) & mask;
74  }
75}
76
77static BROTLI_INLINE uint32_t MyRand(uint32_t* seed) {
78  /* Initial seed should be 7. In this case, loop length is (1 << 29). */
79  *seed *= 16807U;
80  return *seed;
81}
82
83static BROTLI_INLINE double BitCost(size_t count) {
84  return count == 0 ? -2.0 : FastLog2(count);
85}
86
87#define HISTOGRAMS_PER_BATCH 64
88#define CLUSTERS_PER_BATCH 16
89
90#define FN(X) X ## Literal
91#define DataType uint8_t
92/* NOLINTNEXTLINE(build/include) */
93#include "./block_splitter_inc.h"
94#undef DataType
95#undef FN
96
97#define FN(X) X ## Command
98#define DataType uint16_t
99/* NOLINTNEXTLINE(build/include) */
100#include "./block_splitter_inc.h"
101#undef FN
102
103#define FN(X) X ## Distance
104/* NOLINTNEXTLINE(build/include) */
105#include "./block_splitter_inc.h"
106#undef DataType
107#undef FN
108
109void BrotliInitBlockSplit(BlockSplit* self) {
110  self->num_types = 0;
111  self->num_blocks = 0;
112  self->types = 0;
113  self->lengths = 0;
114  self->types_alloc_size = 0;
115  self->lengths_alloc_size = 0;
116}
117
118void BrotliDestroyBlockSplit(MemoryManager* m, BlockSplit* self) {
119  BROTLI_FREE(m, self->types);
120  BROTLI_FREE(m, self->lengths);
121}
122
123void BrotliSplitBlock(MemoryManager* m,
124                      const Command* cmds,
125                      const size_t num_commands,
126                      const uint8_t* data,
127                      const size_t pos,
128                      const size_t mask,
129                      const BrotliEncoderParams* params,
130                      BlockSplit* literal_split,
131                      BlockSplit* insert_and_copy_split,
132                      BlockSplit* dist_split) {
133  {
134    size_t literals_count = CountLiterals(cmds, num_commands);
135    uint8_t* literals = BROTLI_ALLOC(m, uint8_t, literals_count);
136    if (BROTLI_IS_OOM(m)) return;
137    /* Create a continuous array of literals. */
138    CopyLiteralsToByteArray(cmds, num_commands, data, pos, mask, literals);
139    /* Create the block split on the array of literals.
140       Literal histograms have alphabet size 256. */
141    SplitByteVectorLiteral(
142        m, literals, literals_count,
143        kSymbolsPerLiteralHistogram, kMaxLiteralHistograms,
144        kLiteralStrideLength, kLiteralBlockSwitchCost, params,
145        literal_split);
146    if (BROTLI_IS_OOM(m)) return;
147    BROTLI_FREE(m, literals);
148  }
149
150  {
151    /* Compute prefix codes for commands. */
152    uint16_t* insert_and_copy_codes = BROTLI_ALLOC(m, uint16_t, num_commands);
153    size_t i;
154    if (BROTLI_IS_OOM(m)) return;
155    for (i = 0; i < num_commands; ++i) {
156      insert_and_copy_codes[i] = cmds[i].cmd_prefix_;
157    }
158    /* Create the block split on the array of command prefixes. */
159    SplitByteVectorCommand(
160        m, insert_and_copy_codes, num_commands,
161        kSymbolsPerCommandHistogram, kMaxCommandHistograms,
162        kCommandStrideLength, kCommandBlockSwitchCost, params,
163        insert_and_copy_split);
164    if (BROTLI_IS_OOM(m)) return;
165    /* TODO: reuse for distances? */
166    BROTLI_FREE(m, insert_and_copy_codes);
167  }
168
169  {
170    /* Create a continuous array of distance prefixes. */
171    uint16_t* distance_prefixes = BROTLI_ALLOC(m, uint16_t, num_commands);
172    size_t j = 0;
173    size_t i;
174    if (BROTLI_IS_OOM(m)) return;
175    for (i = 0; i < num_commands; ++i) {
176      const Command* cmd = &cmds[i];
177      if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) {
178        distance_prefixes[j++] = cmd->dist_prefix_;
179      }
180    }
181    /* Create the block split on the array of distance prefixes. */
182    SplitByteVectorDistance(
183        m, distance_prefixes, j,
184        kSymbolsPerDistanceHistogram, kMaxCommandHistograms,
185        kCommandStrideLength, kDistanceBlockSwitchCost, params,
186        dist_split);
187    if (BROTLI_IS_OOM(m)) return;
188    BROTLI_FREE(m, distance_prefixes);
189  }
190}
191
192
193#if defined(__cplusplus) || defined(c_plusplus)
194}  /* extern "C" */
195#endif
196