1/* Copyright 2013 Google Inc. All Rights Reserved. 2 3 Distributed under MIT license. 4 See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 5*/ 6 7/* Implementation of Brotli compressor. */ 8 9#include <brotli/encode.h> 10 11#include <stdlib.h> /* free, malloc */ 12#include <string.h> /* memcpy, memset */ 13 14#include "../common/version.h" 15#include "./backward_references.h" 16#include "./backward_references_hq.h" 17#include "./bit_cost.h" 18#include "./brotli_bit_stream.h" 19#include "./compress_fragment.h" 20#include "./compress_fragment_two_pass.h" 21#include "./context.h" 22#include "./entropy_encode.h" 23#include "./fast_log.h" 24#include "./hash.h" 25#include "./histogram.h" 26#include "./memory.h" 27#include "./metablock.h" 28#include "./port.h" 29#include "./prefix.h" 30#include "./quality.h" 31#include "./ringbuffer.h" 32#include "./utf8_util.h" 33#include "./write_bits.h" 34 35#if defined(__cplusplus) || defined(c_plusplus) 36extern "C" { 37#endif 38 39#define COPY_ARRAY(dst, src) memcpy(dst, src, sizeof(src)); 40 41typedef enum BrotliEncoderStreamState { 42 /* Default state. */ 43 BROTLI_STREAM_PROCESSING = 0, 44 /* Intermediate state; after next block is emitted, byte-padding should be 45 performed before getting back to default state. */ 46 BROTLI_STREAM_FLUSH_REQUESTED = 1, 47 /* Last metablock was produced; no more input is acceptable. */ 48 BROTLI_STREAM_FINISHED = 2, 49 /* Flushing compressed block and writing meta-data block header. */ 50 BROTLI_STREAM_METADATA_HEAD = 3, 51 /* Writing metadata block body. */ 52 BROTLI_STREAM_METADATA_BODY = 4 53} BrotliEncoderStreamState; 54 55typedef struct BrotliEncoderStateStruct { 56 BrotliEncoderParams params; 57 58 MemoryManager memory_manager_; 59 60 HasherHandle hasher_; 61 uint64_t input_pos_; 62 RingBuffer ringbuffer_; 63 size_t cmd_alloc_size_; 64 Command* commands_; 65 size_t num_commands_; 66 size_t num_literals_; 67 size_t last_insert_len_; 68 uint64_t last_flush_pos_; 69 uint64_t last_processed_pos_; 70 int dist_cache_[BROTLI_NUM_DISTANCE_SHORT_CODES]; 71 int saved_dist_cache_[4]; 72 uint8_t last_byte_; 73 uint8_t last_byte_bits_; 74 uint8_t prev_byte_; 75 uint8_t prev_byte2_; 76 size_t storage_size_; 77 uint8_t* storage_; 78 /* Hash table for FAST_ONE_PASS_COMPRESSION_QUALITY mode. */ 79 int small_table_[1 << 10]; /* 4KiB */ 80 int* large_table_; /* Allocated only when needed */ 81 size_t large_table_size_; 82 /* Command and distance prefix codes (each 64 symbols, stored back-to-back) 83 used for the next block in FAST_ONE_PASS_COMPRESSION_QUALITY. The command 84 prefix code is over a smaller alphabet with the following 64 symbols: 85 0 - 15: insert length code 0, copy length code 0 - 15, same distance 86 16 - 39: insert length code 0, copy length code 0 - 23 87 40 - 63: insert length code 0 - 23, copy length code 0 88 Note that symbols 16 and 40 represent the same code in the full alphabet, 89 but we do not use either of them in FAST_ONE_PASS_COMPRESSION_QUALITY. */ 90 uint8_t cmd_depths_[128]; 91 uint16_t cmd_bits_[128]; 92 /* The compressed form of the command and distance prefix codes for the next 93 block in FAST_ONE_PASS_COMPRESSION_QUALITY. */ 94 uint8_t cmd_code_[512]; 95 size_t cmd_code_numbits_; 96 /* Command and literal buffers for FAST_TWO_PASS_COMPRESSION_QUALITY. */ 97 uint32_t* command_buf_; 98 uint8_t* literal_buf_; 99 100 uint8_t* next_out_; 101 size_t available_out_; 102 size_t total_out_; 103 /* Temporary buffer for padding flush bits or metadata block header / body. */ 104 union { 105 uint64_t u64[2]; 106 uint8_t u8[16]; 107 } tiny_buf_; 108 uint32_t remaining_metadata_bytes_; 109 BrotliEncoderStreamState stream_state_; 110 111 BROTLI_BOOL is_last_block_emitted_; 112 BROTLI_BOOL is_initialized_; 113} BrotliEncoderStateStruct; 114 115static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s); 116 117static size_t InputBlockSize(BrotliEncoderState* s) { 118 if (!EnsureInitialized(s)) return 0; 119 return (size_t)1 << s->params.lgblock; 120} 121 122static uint64_t UnprocessedInputSize(BrotliEncoderState* s) { 123 return s->input_pos_ - s->last_processed_pos_; 124} 125 126static size_t RemainingInputBlockSize(BrotliEncoderState* s) { 127 const uint64_t delta = UnprocessedInputSize(s); 128 size_t block_size = InputBlockSize(s); 129 if (delta >= block_size) return 0; 130 return block_size - (size_t)delta; 131} 132 133BROTLI_BOOL BrotliEncoderSetParameter( 134 BrotliEncoderState* state, BrotliEncoderParameter p, uint32_t value) { 135 /* Changing parameters on the fly is not implemented yet. */ 136 if (state->is_initialized_) return BROTLI_FALSE; 137 /* TODO: Validate/clamp parameters here. */ 138 switch (p) { 139 case BROTLI_PARAM_MODE: 140 state->params.mode = (BrotliEncoderMode)value; 141 return BROTLI_TRUE; 142 143 case BROTLI_PARAM_QUALITY: 144 state->params.quality = (int)value; 145 return BROTLI_TRUE; 146 147 case BROTLI_PARAM_LGWIN: 148 state->params.lgwin = (int)value; 149 return BROTLI_TRUE; 150 151 case BROTLI_PARAM_LGBLOCK: 152 state->params.lgblock = (int)value; 153 return BROTLI_TRUE; 154 155 case BROTLI_PARAM_DISABLE_LITERAL_CONTEXT_MODELING: 156 if ((value != 0) && (value != 1)) return BROTLI_FALSE; 157 state->params.disable_literal_context_modeling = TO_BROTLI_BOOL(!!value); 158 return BROTLI_TRUE; 159 160 case BROTLI_PARAM_SIZE_HINT: 161 state->params.size_hint = value; 162 return BROTLI_TRUE; 163 164 default: return BROTLI_FALSE; 165 } 166} 167 168static void RecomputeDistancePrefixes(Command* cmds, 169 size_t num_commands, 170 uint32_t num_direct_distance_codes, 171 uint32_t distance_postfix_bits) { 172 size_t i; 173 if (num_direct_distance_codes == 0 && distance_postfix_bits == 0) { 174 return; 175 } 176 for (i = 0; i < num_commands; ++i) { 177 Command* cmd = &cmds[i]; 178 if (CommandCopyLen(cmd) && cmd->cmd_prefix_ >= 128) { 179 PrefixEncodeCopyDistance(CommandRestoreDistanceCode(cmd), 180 num_direct_distance_codes, 181 distance_postfix_bits, 182 &cmd->dist_prefix_, 183 &cmd->dist_extra_); 184 } 185 } 186} 187 188/* Wraps 64-bit input position to 32-bit ring-buffer position preserving 189 "not-a-first-lap" feature. */ 190static uint32_t WrapPosition(uint64_t position) { 191 uint32_t result = (uint32_t)position; 192 uint64_t gb = position >> 30; 193 if (gb > 2) { 194 /* Wrap every 2GiB; The first 3GB are continuous. */ 195 result = (result & ((1u << 30) - 1)) | ((uint32_t)((gb - 1) & 1) + 1) << 30; 196 } 197 return result; 198} 199 200static uint8_t* GetBrotliStorage(BrotliEncoderState* s, size_t size) { 201 MemoryManager* m = &s->memory_manager_; 202 if (s->storage_size_ < size) { 203 BROTLI_FREE(m, s->storage_); 204 s->storage_ = BROTLI_ALLOC(m, uint8_t, size); 205 if (BROTLI_IS_OOM(m)) return NULL; 206 s->storage_size_ = size; 207 } 208 return s->storage_; 209} 210 211static size_t HashTableSize(size_t max_table_size, size_t input_size) { 212 size_t htsize = 256; 213 while (htsize < max_table_size && htsize < input_size) { 214 htsize <<= 1; 215 } 216 return htsize; 217} 218 219static int* GetHashTable(BrotliEncoderState* s, int quality, 220 size_t input_size, size_t* table_size) { 221 /* Use smaller hash table when input.size() is smaller, since we 222 fill the table, incurring O(hash table size) overhead for 223 compression, and if the input is short, we won't need that 224 many hash table entries anyway. */ 225 MemoryManager* m = &s->memory_manager_; 226 const size_t max_table_size = MaxHashTableSize(quality); 227 size_t htsize = HashTableSize(max_table_size, input_size); 228 int* table; 229 assert(max_table_size >= 256); 230 if (quality == FAST_ONE_PASS_COMPRESSION_QUALITY) { 231 /* Only odd shifts are supported by fast-one-pass. */ 232 if ((htsize & 0xAAAAA) == 0) { 233 htsize <<= 1; 234 } 235 } 236 237 if (htsize <= sizeof(s->small_table_) / sizeof(s->small_table_[0])) { 238 table = s->small_table_; 239 } else { 240 if (htsize > s->large_table_size_) { 241 s->large_table_size_ = htsize; 242 BROTLI_FREE(m, s->large_table_); 243 s->large_table_ = BROTLI_ALLOC(m, int, htsize); 244 if (BROTLI_IS_OOM(m)) return 0; 245 } 246 table = s->large_table_; 247 } 248 249 *table_size = htsize; 250 memset(table, 0, htsize * sizeof(*table)); 251 return table; 252} 253 254static void EncodeWindowBits(int lgwin, uint8_t* last_byte, 255 uint8_t* last_byte_bits) { 256 if (lgwin == 16) { 257 *last_byte = 0; 258 *last_byte_bits = 1; 259 } else if (lgwin == 17) { 260 *last_byte = 1; 261 *last_byte_bits = 7; 262 } else if (lgwin > 17) { 263 *last_byte = (uint8_t)(((lgwin - 17) << 1) | 1); 264 *last_byte_bits = 4; 265 } else { 266 *last_byte = (uint8_t)(((lgwin - 8) << 4) | 1); 267 *last_byte_bits = 7; 268 } 269} 270 271/* Initializes the command and distance prefix codes for the first block. */ 272static void InitCommandPrefixCodes(uint8_t cmd_depths[128], 273 uint16_t cmd_bits[128], 274 uint8_t cmd_code[512], 275 size_t* cmd_code_numbits) { 276 static const uint8_t kDefaultCommandDepths[128] = { 277 0, 4, 4, 5, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 278 0, 0, 0, 4, 4, 4, 4, 4, 5, 5, 6, 6, 6, 6, 7, 7, 279 7, 7, 10, 10, 10, 10, 10, 10, 0, 4, 4, 5, 5, 5, 6, 6, 280 7, 8, 8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 281 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 282 6, 6, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 283 4, 4, 4, 5, 5, 5, 5, 5, 5, 6, 6, 7, 7, 7, 8, 10, 284 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 285 }; 286 static const uint16_t kDefaultCommandBits[128] = { 287 0, 0, 8, 9, 3, 35, 7, 71, 288 39, 103, 23, 47, 175, 111, 239, 31, 289 0, 0, 0, 4, 12, 2, 10, 6, 290 13, 29, 11, 43, 27, 59, 87, 55, 291 15, 79, 319, 831, 191, 703, 447, 959, 292 0, 14, 1, 25, 5, 21, 19, 51, 293 119, 159, 95, 223, 479, 991, 63, 575, 294 127, 639, 383, 895, 255, 767, 511, 1023, 295 14, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 296 27, 59, 7, 39, 23, 55, 30, 1, 17, 9, 25, 5, 0, 8, 4, 12, 297 2, 10, 6, 21, 13, 29, 3, 19, 11, 15, 47, 31, 95, 63, 127, 255, 298 767, 2815, 1791, 3839, 511, 2559, 1535, 3583, 1023, 3071, 2047, 4095, 299 }; 300 static const uint8_t kDefaultCommandCode[] = { 301 0xff, 0x77, 0xd5, 0xbf, 0xe7, 0xde, 0xea, 0x9e, 0x51, 0x5d, 0xde, 0xc6, 302 0x70, 0x57, 0xbc, 0x58, 0x58, 0x58, 0xd8, 0xd8, 0x58, 0xd5, 0xcb, 0x8c, 303 0xea, 0xe0, 0xc3, 0x87, 0x1f, 0x83, 0xc1, 0x60, 0x1c, 0x67, 0xb2, 0xaa, 304 0x06, 0x83, 0xc1, 0x60, 0x30, 0x18, 0xcc, 0xa1, 0xce, 0x88, 0x54, 0x94, 305 0x46, 0xe1, 0xb0, 0xd0, 0x4e, 0xb2, 0xf7, 0x04, 0x00, 306 }; 307 static const size_t kDefaultCommandCodeNumBits = 448; 308 COPY_ARRAY(cmd_depths, kDefaultCommandDepths); 309 COPY_ARRAY(cmd_bits, kDefaultCommandBits); 310 311 /* Initialize the pre-compressed form of the command and distance prefix 312 codes. */ 313 COPY_ARRAY(cmd_code, kDefaultCommandCode); 314 *cmd_code_numbits = kDefaultCommandCodeNumBits; 315} 316 317/* Decide about the context map based on the ability of the prediction 318 ability of the previous byte UTF8-prefix on the next byte. The 319 prediction ability is calculated as Shannon entropy. Here we need 320 Shannon entropy instead of 'BitsEntropy' since the prefix will be 321 encoded with the remaining 6 bits of the following byte, and 322 BitsEntropy will assume that symbol to be stored alone using Huffman 323 coding. */ 324static void ChooseContextMap(int quality, 325 uint32_t* bigram_histo, 326 size_t* num_literal_contexts, 327 const uint32_t** literal_context_map) { 328 static const uint32_t kStaticContextMapContinuation[64] = { 329 1, 1, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 330 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 331 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 332 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 333 }; 334 static const uint32_t kStaticContextMapSimpleUTF8[64] = { 335 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 336 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 337 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 338 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 339 }; 340 341 uint32_t monogram_histo[3] = { 0 }; 342 uint32_t two_prefix_histo[6] = { 0 }; 343 size_t total; 344 size_t i; 345 size_t dummy; 346 double entropy[4]; 347 for (i = 0; i < 9; ++i) { 348 monogram_histo[i % 3] += bigram_histo[i]; 349 two_prefix_histo[i % 6] += bigram_histo[i]; 350 } 351 entropy[1] = ShannonEntropy(monogram_histo, 3, &dummy); 352 entropy[2] = (ShannonEntropy(two_prefix_histo, 3, &dummy) + 353 ShannonEntropy(two_prefix_histo + 3, 3, &dummy)); 354 entropy[3] = 0; 355 for (i = 0; i < 3; ++i) { 356 entropy[3] += ShannonEntropy(bigram_histo + 3 * i, 3, &dummy); 357 } 358 359 total = monogram_histo[0] + monogram_histo[1] + monogram_histo[2]; 360 assert(total != 0); 361 entropy[0] = 1.0 / (double)total; 362 entropy[1] *= entropy[0]; 363 entropy[2] *= entropy[0]; 364 entropy[3] *= entropy[0]; 365 366 if (quality < MIN_QUALITY_FOR_HQ_CONTEXT_MODELING) { 367 /* 3 context models is a bit slower, don't use it at lower qualities. */ 368 entropy[3] = entropy[1] * 10; 369 } 370 /* If expected savings by symbol are less than 0.2 bits, skip the 371 context modeling -- in exchange for faster decoding speed. */ 372 if (entropy[1] - entropy[2] < 0.2 && 373 entropy[1] - entropy[3] < 0.2) { 374 *num_literal_contexts = 1; 375 } else if (entropy[2] - entropy[3] < 0.02) { 376 *num_literal_contexts = 2; 377 *literal_context_map = kStaticContextMapSimpleUTF8; 378 } else { 379 *num_literal_contexts = 3; 380 *literal_context_map = kStaticContextMapContinuation; 381 } 382} 383 384/* Decide if we want to use a more complex static context map containing 13 385 context values, based on the entropy reduction of histograms over the 386 first 5 bits of literals. */ 387static BROTLI_BOOL ShouldUseComplexStaticContextMap(const uint8_t* input, 388 size_t start_pos, size_t length, size_t mask, int quality, 389 size_t size_hint, ContextType* literal_context_mode, 390 size_t* num_literal_contexts, const uint32_t** literal_context_map) { 391 static const uint32_t kStaticContextMapComplexUTF8[64] = { 392 11, 11, 12, 12, /* 0 special */ 393 0, 0, 0, 0, /* 4 lf */ 394 1, 1, 9, 9, /* 8 space */ 395 2, 2, 2, 2, /* !, first after space/lf and after something else. */ 396 1, 1, 1, 1, /* " */ 397 8, 3, 3, 3, /* % */ 398 1, 1, 1, 1, /* ({[ */ 399 2, 2, 2, 2, /* }]) */ 400 8, 4, 4, 4, /* :; */ 401 8, 7, 4, 4, /* . */ 402 8, 0, 0, 0, /* > */ 403 3, 3, 3, 3, /* [0..9] */ 404 5, 5, 10, 5, /* [A-Z] */ 405 5, 5, 10, 5, 406 6, 6, 6, 6, /* [a-z] */ 407 6, 6, 6, 6, 408 }; 409 BROTLI_UNUSED(quality); 410 /* Try the more complex static context map only for long data. */ 411 if (size_hint < (1 << 20)) { 412 return BROTLI_FALSE; 413 } else { 414 const size_t end_pos = start_pos + length; 415 /* To make entropy calculations faster and to fit on the stack, we collect 416 histograms over the 5 most significant bits of literals. One histogram 417 without context and 13 additional histograms for each context value. */ 418 uint32_t combined_histo[32] = { 0 }; 419 uint32_t context_histo[13][32] = { { 0 } }; 420 uint32_t total = 0; 421 double entropy[3]; 422 size_t dummy; 423 size_t i; 424 for (; start_pos + 64 <= end_pos; start_pos += 4096) { 425 const size_t stride_end_pos = start_pos + 64; 426 uint8_t prev2 = input[start_pos & mask]; 427 uint8_t prev1 = input[(start_pos + 1) & mask]; 428 size_t pos; 429 /* To make the analysis of the data faster we only examine 64 byte long 430 strides at every 4kB intervals. */ 431 for (pos = start_pos + 2; pos < stride_end_pos; ++pos) { 432 const uint8_t literal = input[pos & mask]; 433 const uint8_t context = (uint8_t)kStaticContextMapComplexUTF8[ 434 Context(prev1, prev2, CONTEXT_UTF8)]; 435 ++total; 436 ++combined_histo[literal >> 3]; 437 ++context_histo[context][literal >> 3]; 438 prev2 = prev1; 439 prev1 = literal; 440 } 441 } 442 entropy[1] = ShannonEntropy(combined_histo, 32, &dummy); 443 entropy[2] = 0; 444 for (i = 0; i < 13; ++i) { 445 entropy[2] += ShannonEntropy(&context_histo[i][0], 32, &dummy); 446 } 447 entropy[0] = 1.0 / (double)total; 448 entropy[1] *= entropy[0]; 449 entropy[2] *= entropy[0]; 450 /* The triggering heuristics below were tuned by compressing the individual 451 files of the silesia corpus. If we skip this kind of context modeling 452 for not very well compressible input (i.e. entropy using context modeling 453 is 60% of maximal entropy) or if expected savings by symbol are less 454 than 0.2 bits, then in every case when it triggers, the final compression 455 ratio is improved. Note however that this heuristics might be too strict 456 for some cases and could be tuned further. */ 457 if (entropy[2] > 3.0 || entropy[1] - entropy[2] < 0.2) { 458 return BROTLI_FALSE; 459 } else { 460 *literal_context_mode = CONTEXT_UTF8; 461 *num_literal_contexts = 13; 462 *literal_context_map = kStaticContextMapComplexUTF8; 463 return BROTLI_TRUE; 464 } 465 } 466} 467 468static void DecideOverLiteralContextModeling(const uint8_t* input, 469 size_t start_pos, size_t length, size_t mask, int quality, 470 size_t size_hint, ContextType* literal_context_mode, 471 size_t* num_literal_contexts, const uint32_t** literal_context_map) { 472 if (quality < MIN_QUALITY_FOR_CONTEXT_MODELING || length < 64) { 473 return; 474 } else if (ShouldUseComplexStaticContextMap( 475 input, start_pos, length, mask, quality, size_hint, literal_context_mode, 476 num_literal_contexts, literal_context_map)) { 477 /* Context map was already set, nothing else to do. */ 478 } else { 479 /* Gather bi-gram data of the UTF8 byte prefixes. To make the analysis of 480 UTF8 data faster we only examine 64 byte long strides at every 4kB 481 intervals. */ 482 const size_t end_pos = start_pos + length; 483 uint32_t bigram_prefix_histo[9] = { 0 }; 484 for (; start_pos + 64 <= end_pos; start_pos += 4096) { 485 static const int lut[4] = { 0, 0, 1, 2 }; 486 const size_t stride_end_pos = start_pos + 64; 487 int prev = lut[input[start_pos & mask] >> 6] * 3; 488 size_t pos; 489 for (pos = start_pos + 1; pos < stride_end_pos; ++pos) { 490 const uint8_t literal = input[pos & mask]; 491 ++bigram_prefix_histo[prev + lut[literal >> 6]]; 492 prev = lut[literal >> 6] * 3; 493 } 494 } 495 *literal_context_mode = CONTEXT_UTF8; 496 ChooseContextMap(quality, &bigram_prefix_histo[0], num_literal_contexts, 497 literal_context_map); 498 } 499} 500 501static BROTLI_BOOL ShouldCompress( 502 const uint8_t* data, const size_t mask, const uint64_t last_flush_pos, 503 const size_t bytes, const size_t num_literals, const size_t num_commands) { 504 if (num_commands < (bytes >> 8) + 2) { 505 if (num_literals > 0.99 * (double)bytes) { 506 uint32_t literal_histo[256] = { 0 }; 507 static const uint32_t kSampleRate = 13; 508 static const double kMinEntropy = 7.92; 509 const double bit_cost_threshold = 510 (double)bytes * kMinEntropy / kSampleRate; 511 size_t t = (bytes + kSampleRate - 1) / kSampleRate; 512 uint32_t pos = (uint32_t)last_flush_pos; 513 size_t i; 514 for (i = 0; i < t; i++) { 515 ++literal_histo[data[pos & mask]]; 516 pos += kSampleRate; 517 } 518 if (BitsEntropy(literal_histo, 256) > bit_cost_threshold) { 519 return BROTLI_FALSE; 520 } 521 } 522 } 523 return BROTLI_TRUE; 524} 525 526static void WriteMetaBlockInternal(MemoryManager* m, 527 const uint8_t* data, 528 const size_t mask, 529 const uint64_t last_flush_pos, 530 const size_t bytes, 531 const BROTLI_BOOL is_last, 532 const BrotliEncoderParams* params, 533 const uint8_t prev_byte, 534 const uint8_t prev_byte2, 535 const size_t num_literals, 536 const size_t num_commands, 537 Command* commands, 538 const int* saved_dist_cache, 539 int* dist_cache, 540 size_t* storage_ix, 541 uint8_t* storage) { 542 const uint32_t wrapped_last_flush_pos = WrapPosition(last_flush_pos); 543 uint8_t last_byte; 544 uint8_t last_byte_bits; 545 uint32_t num_direct_distance_codes = 0; 546 uint32_t distance_postfix_bits = 0; 547 548 if (bytes == 0) { 549 /* Write the ISLAST and ISEMPTY bits. */ 550 BrotliWriteBits(2, 3, storage_ix, storage); 551 *storage_ix = (*storage_ix + 7u) & ~7u; 552 return; 553 } 554 555 if (!ShouldCompress(data, mask, last_flush_pos, bytes, 556 num_literals, num_commands)) { 557 /* Restore the distance cache, as its last update by 558 CreateBackwardReferences is now unused. */ 559 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); 560 BrotliStoreUncompressedMetaBlock(is_last, data, 561 wrapped_last_flush_pos, mask, bytes, 562 storage_ix, storage); 563 return; 564 } 565 566 last_byte = storage[0]; 567 last_byte_bits = (uint8_t)(*storage_ix & 0xff); 568 if (params->quality >= MIN_QUALITY_FOR_RECOMPUTE_DISTANCE_PREFIXES && 569 params->mode == BROTLI_MODE_FONT) { 570 num_direct_distance_codes = 12; 571 distance_postfix_bits = 1; 572 RecomputeDistancePrefixes(commands, 573 num_commands, 574 num_direct_distance_codes, 575 distance_postfix_bits); 576 } 577 if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) { 578 BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos, 579 bytes, mask, is_last, 580 commands, num_commands, 581 storage_ix, storage); 582 if (BROTLI_IS_OOM(m)) return; 583 } else if (params->quality < MIN_QUALITY_FOR_BLOCK_SPLIT) { 584 BrotliStoreMetaBlockTrivial(m, data, wrapped_last_flush_pos, 585 bytes, mask, is_last, 586 commands, num_commands, 587 storage_ix, storage); 588 if (BROTLI_IS_OOM(m)) return; 589 } else { 590 ContextType literal_context_mode = CONTEXT_UTF8; 591 MetaBlockSplit mb; 592 InitMetaBlockSplit(&mb); 593 if (params->quality < MIN_QUALITY_FOR_HQ_BLOCK_SPLITTING) { 594 size_t num_literal_contexts = 1; 595 const uint32_t* literal_context_map = NULL; 596 if (!params->disable_literal_context_modeling) { 597 DecideOverLiteralContextModeling( 598 data, wrapped_last_flush_pos, bytes, mask, params->quality, 599 params->size_hint, &literal_context_mode, &num_literal_contexts, 600 &literal_context_map); 601 } 602 BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask, 603 prev_byte, prev_byte2, literal_context_mode, num_literal_contexts, 604 literal_context_map, commands, num_commands, &mb); 605 if (BROTLI_IS_OOM(m)) return; 606 } else { 607 if (!BrotliIsMostlyUTF8(data, wrapped_last_flush_pos, mask, bytes, 608 kMinUTF8Ratio)) { 609 literal_context_mode = CONTEXT_SIGNED; 610 } 611 BrotliBuildMetaBlock(m, data, wrapped_last_flush_pos, mask, params, 612 prev_byte, prev_byte2, 613 commands, num_commands, 614 literal_context_mode, 615 &mb); 616 if (BROTLI_IS_OOM(m)) return; 617 } 618 if (params->quality >= MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS) { 619 BrotliOptimizeHistograms(num_direct_distance_codes, 620 distance_postfix_bits, 621 &mb); 622 } 623 BrotliStoreMetaBlock(m, data, wrapped_last_flush_pos, bytes, mask, 624 prev_byte, prev_byte2, 625 is_last, 626 num_direct_distance_codes, 627 distance_postfix_bits, 628 literal_context_mode, 629 commands, num_commands, 630 &mb, 631 storage_ix, storage); 632 if (BROTLI_IS_OOM(m)) return; 633 DestroyMetaBlockSplit(m, &mb); 634 } 635 if (bytes + 4 < (*storage_ix >> 3)) { 636 /* Restore the distance cache and last byte. */ 637 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); 638 storage[0] = last_byte; 639 *storage_ix = last_byte_bits; 640 BrotliStoreUncompressedMetaBlock(is_last, data, 641 wrapped_last_flush_pos, mask, 642 bytes, storage_ix, storage); 643 } 644} 645 646static BROTLI_BOOL EnsureInitialized(BrotliEncoderState* s) { 647 if (BROTLI_IS_OOM(&s->memory_manager_)) return BROTLI_FALSE; 648 if (s->is_initialized_) return BROTLI_TRUE; 649 650 SanitizeParams(&s->params); 651 s->params.lgblock = ComputeLgBlock(&s->params); 652 653 s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX; 654 655 RingBufferSetup(&s->params, &s->ringbuffer_); 656 657 /* Initialize last byte with stream header. */ 658 { 659 int lgwin = s->params.lgwin; 660 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY || 661 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { 662 lgwin = BROTLI_MAX(int, lgwin, 18); 663 } 664 EncodeWindowBits(lgwin, &s->last_byte_, &s->last_byte_bits_); 665 } 666 667 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) { 668 InitCommandPrefixCodes(s->cmd_depths_, s->cmd_bits_, 669 s->cmd_code_, &s->cmd_code_numbits_); 670 } 671 672 s->is_initialized_ = BROTLI_TRUE; 673 return BROTLI_TRUE; 674} 675 676static void BrotliEncoderInitParams(BrotliEncoderParams* params) { 677 params->mode = BROTLI_DEFAULT_MODE; 678 params->quality = BROTLI_DEFAULT_QUALITY; 679 params->lgwin = BROTLI_DEFAULT_WINDOW; 680 params->lgblock = 0; 681 params->size_hint = 0; 682 params->disable_literal_context_modeling = BROTLI_FALSE; 683} 684 685static void BrotliEncoderInitState(BrotliEncoderState* s) { 686 BrotliEncoderInitParams(&s->params); 687 s->input_pos_ = 0; 688 s->num_commands_ = 0; 689 s->num_literals_ = 0; 690 s->last_insert_len_ = 0; 691 s->last_flush_pos_ = 0; 692 s->last_processed_pos_ = 0; 693 s->prev_byte_ = 0; 694 s->prev_byte2_ = 0; 695 s->storage_size_ = 0; 696 s->storage_ = 0; 697 s->hasher_ = NULL; 698 s->large_table_ = NULL; 699 s->large_table_size_ = 0; 700 s->cmd_code_numbits_ = 0; 701 s->command_buf_ = NULL; 702 s->literal_buf_ = NULL; 703 s->next_out_ = NULL; 704 s->available_out_ = 0; 705 s->total_out_ = 0; 706 s->stream_state_ = BROTLI_STREAM_PROCESSING; 707 s->is_last_block_emitted_ = BROTLI_FALSE; 708 s->is_initialized_ = BROTLI_FALSE; 709 710 RingBufferInit(&s->ringbuffer_); 711 712 s->commands_ = 0; 713 s->cmd_alloc_size_ = 0; 714 715 /* Initialize distance cache. */ 716 s->dist_cache_[0] = 4; 717 s->dist_cache_[1] = 11; 718 s->dist_cache_[2] = 15; 719 s->dist_cache_[3] = 16; 720 /* Save the state of the distance cache in case we need to restore it for 721 emitting an uncompressed block. */ 722 memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_)); 723} 724 725BrotliEncoderState* BrotliEncoderCreateInstance(brotli_alloc_func alloc_func, 726 brotli_free_func free_func, 727 void* opaque) { 728 BrotliEncoderState* state = 0; 729 if (!alloc_func && !free_func) { 730 state = (BrotliEncoderState*)malloc(sizeof(BrotliEncoderState)); 731 } else if (alloc_func && free_func) { 732 state = (BrotliEncoderState*)alloc_func(opaque, sizeof(BrotliEncoderState)); 733 } 734 if (state == 0) { 735 /* BROTLI_DUMP(); */ 736 return 0; 737 } 738 BrotliInitMemoryManager( 739 &state->memory_manager_, alloc_func, free_func, opaque); 740 BrotliEncoderInitState(state); 741 return state; 742} 743 744static void BrotliEncoderCleanupState(BrotliEncoderState* s) { 745 MemoryManager* m = &s->memory_manager_; 746 if (BROTLI_IS_OOM(m)) { 747 BrotliWipeOutMemoryManager(m); 748 return; 749 } 750 BROTLI_FREE(m, s->storage_); 751 BROTLI_FREE(m, s->commands_); 752 RingBufferFree(m, &s->ringbuffer_); 753 DestroyHasher(m, &s->hasher_); 754 BROTLI_FREE(m, s->large_table_); 755 BROTLI_FREE(m, s->command_buf_); 756 BROTLI_FREE(m, s->literal_buf_); 757} 758 759/* Deinitializes and frees BrotliEncoderState instance. */ 760void BrotliEncoderDestroyInstance(BrotliEncoderState* state) { 761 if (!state) { 762 return; 763 } else { 764 MemoryManager* m = &state->memory_manager_; 765 brotli_free_func free_func = m->free_func; 766 void* opaque = m->opaque; 767 BrotliEncoderCleanupState(state); 768 free_func(opaque, state); 769 } 770} 771 772/* 773 Copies the given input data to the internal ring buffer of the compressor. 774 No processing of the data occurs at this time and this function can be 775 called multiple times before calling WriteBrotliData() to process the 776 accumulated input. At most input_block_size() bytes of input data can be 777 copied to the ring buffer, otherwise the next WriteBrotliData() will fail. 778 */ 779static void CopyInputToRingBuffer(BrotliEncoderState* s, 780 const size_t input_size, 781 const uint8_t* input_buffer) { 782 RingBuffer* ringbuffer_ = &s->ringbuffer_; 783 MemoryManager* m = &s->memory_manager_; 784 if (!EnsureInitialized(s)) return; 785 RingBufferWrite(m, input_buffer, input_size, ringbuffer_); 786 if (BROTLI_IS_OOM(m)) return; 787 s->input_pos_ += input_size; 788 789 /* TL;DR: If needed, initialize 7 more bytes in the ring buffer to make the 790 hashing not depend on uninitialized data. This makes compression 791 deterministic and it prevents uninitialized memory warnings in Valgrind. 792 Even without erasing, the output would be valid (but nondeterministic). 793 794 Background information: The compressor stores short (at most 8 bytes) 795 substrings of the input already read in a hash table, and detects 796 repetitions by looking up such substrings in the hash table. If it 797 can find a substring, it checks whether the substring is really there 798 in the ring buffer (or it's just a hash collision). Should the hash 799 table become corrupt, this check makes sure that the output is 800 still valid, albeit the compression ratio would be bad. 801 802 The compressor populates the hash table from the ring buffer as it's 803 reading new bytes from the input. However, at the last few indexes of 804 the ring buffer, there are not enough bytes to build full-length 805 substrings from. Since the hash table always contains full-length 806 substrings, we erase with dummy zeros here to make sure that those 807 substrings will contain zeros at the end instead of uninitialized 808 data. 809 810 Please note that erasing is not necessary (because the 811 memory region is already initialized since he ring buffer 812 has a `tail' that holds a copy of the beginning,) so we 813 skip erasing if we have already gone around at least once in 814 the ring buffer. 815 816 Only clear during the first round of ring-buffer writes. On 817 subsequent rounds data in the ring-buffer would be affected. */ 818 if (ringbuffer_->pos_ <= ringbuffer_->mask_) { 819 /* This is the first time when the ring buffer is being written. 820 We clear 7 bytes just after the bytes that have been copied from 821 the input buffer. 822 823 The ring-buffer has a "tail" that holds a copy of the beginning, 824 but only once the ring buffer has been fully written once, i.e., 825 pos <= mask. For the first time, we need to write values 826 in this tail (where index may be larger than mask), so that 827 we have exactly defined behavior and don't read uninitialized 828 memory. Due to performance reasons, hashing reads data using a 829 LOAD64, which can go 7 bytes beyond the bytes written in the 830 ring-buffer. */ 831 memset(ringbuffer_->buffer_ + ringbuffer_->pos_, 0, 7); 832 } 833} 834 835void BrotliEncoderSetCustomDictionary(BrotliEncoderState* s, size_t size, 836 const uint8_t* dict) { 837 size_t max_dict_size = BROTLI_MAX_BACKWARD_LIMIT(s->params.lgwin); 838 size_t dict_size = size; 839 MemoryManager* m = &s->memory_manager_; 840 841 if (!EnsureInitialized(s)) return; 842 843 if (dict_size == 0 || 844 s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY || 845 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { 846 return; 847 } 848 if (size > max_dict_size) { 849 dict += size - max_dict_size; 850 dict_size = max_dict_size; 851 } 852 CopyInputToRingBuffer(s, dict_size, dict); 853 s->last_flush_pos_ = dict_size; 854 s->last_processed_pos_ = dict_size; 855 if (dict_size > 0) { 856 s->prev_byte_ = dict[dict_size - 1]; 857 } 858 if (dict_size > 1) { 859 s->prev_byte2_ = dict[dict_size - 2]; 860 } 861 HasherPrependCustomDictionary(m, &s->hasher_, &s->params, dict_size, dict); 862 if (BROTLI_IS_OOM(m)) return; 863} 864 865/* Marks all input as processed. 866 Returns true if position wrapping occurs. */ 867static BROTLI_BOOL UpdateLastProcessedPos(BrotliEncoderState* s) { 868 uint32_t wrapped_last_processed_pos = WrapPosition(s->last_processed_pos_); 869 uint32_t wrapped_input_pos = WrapPosition(s->input_pos_); 870 s->last_processed_pos_ = s->input_pos_; 871 return TO_BROTLI_BOOL(wrapped_input_pos < wrapped_last_processed_pos); 872} 873 874/* 875 Processes the accumulated input data and sets |*out_size| to the length of 876 the new output meta-block, or to zero if no new output meta-block has been 877 created (in this case the processed input data is buffered internally). 878 If |*out_size| is positive, |*output| points to the start of the output 879 data. If |is_last| or |force_flush| is BROTLI_TRUE, an output meta-block is 880 always created. However, until |is_last| is BROTLI_TRUE encoder may retain up 881 to 7 bits of the last byte of output. To force encoder to dump the remaining 882 bits use WriteMetadata() to append an empty meta-data block. 883 Returns BROTLI_FALSE if the size of the input data is larger than 884 input_block_size(). 885 */ 886static BROTLI_BOOL EncodeData( 887 BrotliEncoderState* s, const BROTLI_BOOL is_last, 888 const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) { 889 const uint64_t delta = UnprocessedInputSize(s); 890 const uint32_t bytes = (uint32_t)delta; 891 const uint32_t wrapped_last_processed_pos = 892 WrapPosition(s->last_processed_pos_); 893 uint8_t* data; 894 uint32_t mask; 895 MemoryManager* m = &s->memory_manager_; 896 const BrotliDictionary* dictionary = BrotliGetDictionary(); 897 898 if (!EnsureInitialized(s)) return BROTLI_FALSE; 899 data = s->ringbuffer_.buffer_; 900 mask = s->ringbuffer_.mask_; 901 902 /* Adding more blocks after "last" block is forbidden. */ 903 if (s->is_last_block_emitted_) return BROTLI_FALSE; 904 if (is_last) s->is_last_block_emitted_ = BROTLI_TRUE; 905 906 if (delta > InputBlockSize(s)) { 907 return BROTLI_FALSE; 908 } 909 if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY && 910 !s->command_buf_) { 911 s->command_buf_ = 912 BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize); 913 s->literal_buf_ = 914 BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize); 915 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 916 } 917 918 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY || 919 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { 920 uint8_t* storage; 921 size_t storage_ix = s->last_byte_bits_; 922 size_t table_size; 923 int* table; 924 925 if (delta == 0 && !is_last) { 926 /* We have no new input data and we don't have to finish the stream, so 927 nothing to do. */ 928 *out_size = 0; 929 return BROTLI_TRUE; 930 } 931 storage = GetBrotliStorage(s, 2 * bytes + 502); 932 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 933 storage[0] = s->last_byte_; 934 table = GetHashTable(s, s->params.quality, bytes, &table_size); 935 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 936 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) { 937 BrotliCompressFragmentFast( 938 m, &data[wrapped_last_processed_pos & mask], 939 bytes, is_last, 940 table, table_size, 941 s->cmd_depths_, s->cmd_bits_, 942 &s->cmd_code_numbits_, s->cmd_code_, 943 &storage_ix, storage); 944 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 945 } else { 946 BrotliCompressFragmentTwoPass( 947 m, &data[wrapped_last_processed_pos & mask], 948 bytes, is_last, 949 s->command_buf_, s->literal_buf_, 950 table, table_size, 951 &storage_ix, storage); 952 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 953 } 954 s->last_byte_ = storage[storage_ix >> 3]; 955 s->last_byte_bits_ = storage_ix & 7u; 956 UpdateLastProcessedPos(s); 957 *output = &storage[0]; 958 *out_size = storage_ix >> 3; 959 return BROTLI_TRUE; 960 } 961 962 { 963 /* Theoretical max number of commands is 1 per 2 bytes. */ 964 size_t newsize = s->num_commands_ + bytes / 2 + 1; 965 if (newsize > s->cmd_alloc_size_) { 966 Command* new_commands; 967 /* Reserve a bit more memory to allow merging with a next block 968 without reallocation: that would impact speed. */ 969 newsize += (bytes / 4) + 16; 970 s->cmd_alloc_size_ = newsize; 971 new_commands = BROTLI_ALLOC(m, Command, newsize); 972 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 973 if (s->commands_) { 974 memcpy(new_commands, s->commands_, sizeof(Command) * s->num_commands_); 975 BROTLI_FREE(m, s->commands_); 976 } 977 s->commands_ = new_commands; 978 } 979 } 980 981 InitOrStitchToPreviousBlock(m, &s->hasher_, data, mask, &s->params, 982 wrapped_last_processed_pos, bytes, is_last); 983 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 984 985 if (s->params.quality == ZOPFLIFICATION_QUALITY) { 986 assert(s->params.hasher.type == 10); 987 BrotliCreateZopfliBackwardReferences( 988 m, dictionary, bytes, wrapped_last_processed_pos, data, mask, 989 &s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_, 990 &s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_); 991 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 992 } else if (s->params.quality == HQ_ZOPFLIFICATION_QUALITY) { 993 assert(s->params.hasher.type == 10); 994 BrotliCreateHqZopfliBackwardReferences( 995 m, dictionary, bytes, wrapped_last_processed_pos, data, mask, 996 &s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_, 997 &s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_); 998 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 999 } else { 1000 BrotliCreateBackwardReferences( 1001 dictionary, bytes, wrapped_last_processed_pos, data, mask, 1002 &s->params, s->hasher_, s->dist_cache_, &s->last_insert_len_, 1003 &s->commands_[s->num_commands_], &s->num_commands_, &s->num_literals_); 1004 } 1005 1006 { 1007 const size_t max_length = MaxMetablockSize(&s->params); 1008 const size_t max_literals = max_length / 8; 1009 const size_t max_commands = max_length / 8; 1010 const size_t processed_bytes = (size_t)(s->input_pos_ - s->last_flush_pos_); 1011 /* If maximal possible additional block doesn't fit metablock, flush now. */ 1012 /* TODO: Postpone decision until next block arrives? */ 1013 const BROTLI_BOOL next_input_fits_metablock = TO_BROTLI_BOOL( 1014 processed_bytes + InputBlockSize(s) <= max_length); 1015 /* If block splitting is not used, then flush as soon as there is some 1016 amount of commands / literals produced. */ 1017 const BROTLI_BOOL should_flush = TO_BROTLI_BOOL( 1018 s->params.quality < MIN_QUALITY_FOR_BLOCK_SPLIT && 1019 s->num_literals_ + s->num_commands_ >= MAX_NUM_DELAYED_SYMBOLS); 1020 if (!is_last && !force_flush && !should_flush && 1021 next_input_fits_metablock && 1022 s->num_literals_ < max_literals && 1023 s->num_commands_ < max_commands) { 1024 /* Merge with next input block. Everything will happen later. */ 1025 if (UpdateLastProcessedPos(s)) { 1026 HasherReset(s->hasher_); 1027 } 1028 *out_size = 0; 1029 return BROTLI_TRUE; 1030 } 1031 } 1032 1033 /* Create the last insert-only command. */ 1034 if (s->last_insert_len_ > 0) { 1035 InitInsertCommand(&s->commands_[s->num_commands_++], s->last_insert_len_); 1036 s->num_literals_ += s->last_insert_len_; 1037 s->last_insert_len_ = 0; 1038 } 1039 1040 if (!is_last && s->input_pos_ == s->last_flush_pos_) { 1041 /* We have no new input data and we don't have to finish the stream, so 1042 nothing to do. */ 1043 *out_size = 0; 1044 return BROTLI_TRUE; 1045 } 1046 assert(s->input_pos_ >= s->last_flush_pos_); 1047 assert(s->input_pos_ > s->last_flush_pos_ || is_last); 1048 assert(s->input_pos_ - s->last_flush_pos_ <= 1u << 24); 1049 { 1050 const uint32_t metablock_size = 1051 (uint32_t)(s->input_pos_ - s->last_flush_pos_); 1052 uint8_t* storage = GetBrotliStorage(s, 2 * metablock_size + 502); 1053 size_t storage_ix = s->last_byte_bits_; 1054 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 1055 storage[0] = s->last_byte_; 1056 WriteMetaBlockInternal( 1057 m, data, mask, s->last_flush_pos_, metablock_size, is_last, 1058 &s->params, s->prev_byte_, s->prev_byte2_, 1059 s->num_literals_, s->num_commands_, s->commands_, s->saved_dist_cache_, 1060 s->dist_cache_, &storage_ix, storage); 1061 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 1062 s->last_byte_ = storage[storage_ix >> 3]; 1063 s->last_byte_bits_ = storage_ix & 7u; 1064 s->last_flush_pos_ = s->input_pos_; 1065 if (UpdateLastProcessedPos(s)) { 1066 HasherReset(s->hasher_); 1067 } 1068 if (s->last_flush_pos_ > 0) { 1069 s->prev_byte_ = data[((uint32_t)s->last_flush_pos_ - 1) & mask]; 1070 } 1071 if (s->last_flush_pos_ > 1) { 1072 s->prev_byte2_ = data[(uint32_t)(s->last_flush_pos_ - 2) & mask]; 1073 } 1074 s->num_commands_ = 0; 1075 s->num_literals_ = 0; 1076 /* Save the state of the distance cache in case we need to restore it for 1077 emitting an uncompressed block. */ 1078 memcpy(s->saved_dist_cache_, s->dist_cache_, sizeof(s->saved_dist_cache_)); 1079 *output = &storage[0]; 1080 *out_size = storage_ix >> 3; 1081 return BROTLI_TRUE; 1082 } 1083} 1084 1085/* Dumps remaining output bits and metadata header to |header|. 1086 Returns number of produced bytes. 1087 REQUIRED: |header| should be 8-byte aligned and at least 16 bytes long. 1088 REQUIRED: |block_size| <= (1 << 24). */ 1089static size_t WriteMetadataHeader( 1090 BrotliEncoderState* s, const size_t block_size, uint8_t* header) { 1091 size_t storage_ix; 1092 storage_ix = s->last_byte_bits_; 1093 header[0] = s->last_byte_; 1094 s->last_byte_ = 0; 1095 s->last_byte_bits_ = 0; 1096 1097 BrotliWriteBits(1, 0, &storage_ix, header); 1098 BrotliWriteBits(2, 3, &storage_ix, header); 1099 BrotliWriteBits(1, 0, &storage_ix, header); 1100 if (block_size == 0) { 1101 BrotliWriteBits(2, 0, &storage_ix, header); 1102 } else { 1103 uint32_t nbits = (block_size == 1) ? 0 : 1104 (Log2FloorNonZero((uint32_t)block_size - 1) + 1); 1105 uint32_t nbytes = (nbits + 7) / 8; 1106 BrotliWriteBits(2, nbytes, &storage_ix, header); 1107 BrotliWriteBits(8 * nbytes, block_size - 1, &storage_ix, header); 1108 } 1109 return (storage_ix + 7u) >> 3; 1110} 1111 1112static BROTLI_BOOL BrotliCompressBufferQuality10( 1113 int lgwin, size_t input_size, const uint8_t* input_buffer, 1114 size_t* encoded_size, uint8_t* encoded_buffer) { 1115 MemoryManager memory_manager; 1116 MemoryManager* m = &memory_manager; 1117 1118 const size_t mask = BROTLI_SIZE_MAX >> 1; 1119 const size_t max_backward_limit = BROTLI_MAX_BACKWARD_LIMIT(lgwin); 1120 int dist_cache[4] = { 4, 11, 15, 16 }; 1121 int saved_dist_cache[4] = { 4, 11, 15, 16 }; 1122 BROTLI_BOOL ok = BROTLI_TRUE; 1123 const size_t max_out_size = *encoded_size; 1124 size_t total_out_size = 0; 1125 uint8_t last_byte; 1126 uint8_t last_byte_bits; 1127 HasherHandle hasher = NULL; 1128 1129 const size_t hasher_eff_size = 1130 BROTLI_MIN(size_t, input_size, max_backward_limit + BROTLI_WINDOW_GAP); 1131 1132 BrotliEncoderParams params; 1133 const BrotliDictionary* dictionary = BrotliGetDictionary(); 1134 1135 const int lgmetablock = BROTLI_MIN(int, 24, lgwin + 1); 1136 size_t max_block_size; 1137 const size_t max_metablock_size = (size_t)1 << lgmetablock; 1138 const size_t max_literals_per_metablock = max_metablock_size / 8; 1139 const size_t max_commands_per_metablock = max_metablock_size / 8; 1140 size_t metablock_start = 0; 1141 uint8_t prev_byte = 0; 1142 uint8_t prev_byte2 = 0; 1143 1144 BrotliEncoderInitParams(¶ms); 1145 params.quality = 10; 1146 params.lgwin = lgwin; 1147 SanitizeParams(¶ms); 1148 params.lgblock = ComputeLgBlock(¶ms); 1149 max_block_size = (size_t)1 << params.lgblock; 1150 1151 BrotliInitMemoryManager(m, 0, 0, 0); 1152 1153 assert(input_size <= mask + 1); 1154 EncodeWindowBits(lgwin, &last_byte, &last_byte_bits); 1155 InitOrStitchToPreviousBlock(m, &hasher, input_buffer, mask, ¶ms, 1156 0, hasher_eff_size, BROTLI_TRUE); 1157 if (BROTLI_IS_OOM(m)) goto oom; 1158 1159 while (ok && metablock_start < input_size) { 1160 const size_t metablock_end = 1161 BROTLI_MIN(size_t, input_size, metablock_start + max_metablock_size); 1162 const size_t expected_num_commands = 1163 (metablock_end - metablock_start) / 12 + 16; 1164 Command* commands = 0; 1165 size_t num_commands = 0; 1166 size_t last_insert_len = 0; 1167 size_t num_literals = 0; 1168 size_t metablock_size = 0; 1169 size_t cmd_alloc_size = 0; 1170 BROTLI_BOOL is_last; 1171 uint8_t* storage; 1172 size_t storage_ix; 1173 1174 size_t block_start; 1175 for (block_start = metablock_start; block_start < metablock_end; ) { 1176 size_t block_size = 1177 BROTLI_MIN(size_t, metablock_end - block_start, max_block_size); 1178 ZopfliNode* nodes = BROTLI_ALLOC(m, ZopfliNode, block_size + 1); 1179 size_t path_size; 1180 size_t new_cmd_alloc_size; 1181 if (BROTLI_IS_OOM(m)) goto oom; 1182 BrotliInitZopfliNodes(nodes, block_size + 1); 1183 StitchToPreviousBlockH10(hasher, block_size, block_start, 1184 input_buffer, mask); 1185 path_size = BrotliZopfliComputeShortestPath( 1186 m, dictionary, block_size, block_start, input_buffer, mask, ¶ms, 1187 max_backward_limit, dist_cache, hasher, nodes); 1188 if (BROTLI_IS_OOM(m)) goto oom; 1189 /* We allocate a command buffer in the first iteration of this loop that 1190 will be likely big enough for the whole metablock, so that for most 1191 inputs we will not have to reallocate in later iterations. We do the 1192 allocation here and not before the loop, because if the input is small, 1193 this will be allocated after the Zopfli cost model is freed, so this 1194 will not increase peak memory usage. 1195 TODO: If the first allocation is too small, increase command 1196 buffer size exponentially. */ 1197 new_cmd_alloc_size = BROTLI_MAX(size_t, expected_num_commands, 1198 num_commands + path_size + 1); 1199 if (cmd_alloc_size != new_cmd_alloc_size) { 1200 Command* new_commands = BROTLI_ALLOC(m, Command, new_cmd_alloc_size); 1201 if (BROTLI_IS_OOM(m)) goto oom; 1202 cmd_alloc_size = new_cmd_alloc_size; 1203 if (commands) { 1204 memcpy(new_commands, commands, sizeof(Command) * num_commands); 1205 BROTLI_FREE(m, commands); 1206 } 1207 commands = new_commands; 1208 } 1209 BrotliZopfliCreateCommands(block_size, block_start, max_backward_limit, 1210 &nodes[0], dist_cache, &last_insert_len, 1211 &commands[num_commands], &num_literals); 1212 num_commands += path_size; 1213 block_start += block_size; 1214 metablock_size += block_size; 1215 BROTLI_FREE(m, nodes); 1216 if (num_literals > max_literals_per_metablock || 1217 num_commands > max_commands_per_metablock) { 1218 break; 1219 } 1220 } 1221 1222 if (last_insert_len > 0) { 1223 InitInsertCommand(&commands[num_commands++], last_insert_len); 1224 num_literals += last_insert_len; 1225 } 1226 1227 is_last = TO_BROTLI_BOOL(metablock_start + metablock_size == input_size); 1228 storage = NULL; 1229 storage_ix = last_byte_bits; 1230 1231 if (metablock_size == 0) { 1232 /* Write the ISLAST and ISEMPTY bits. */ 1233 storage = BROTLI_ALLOC(m, uint8_t, 16); 1234 if (BROTLI_IS_OOM(m)) goto oom; 1235 storage[0] = last_byte; 1236 BrotliWriteBits(2, 3, &storage_ix, storage); 1237 storage_ix = (storage_ix + 7u) & ~7u; 1238 } else if (!ShouldCompress(input_buffer, mask, metablock_start, 1239 metablock_size, num_literals, num_commands)) { 1240 /* Restore the distance cache, as its last update by 1241 CreateBackwardReferences is now unused. */ 1242 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); 1243 storage = BROTLI_ALLOC(m, uint8_t, metablock_size + 16); 1244 if (BROTLI_IS_OOM(m)) goto oom; 1245 storage[0] = last_byte; 1246 BrotliStoreUncompressedMetaBlock(is_last, input_buffer, 1247 metablock_start, mask, metablock_size, 1248 &storage_ix, storage); 1249 } else { 1250 uint32_t num_direct_distance_codes = 0; 1251 uint32_t distance_postfix_bits = 0; 1252 ContextType literal_context_mode = CONTEXT_UTF8; 1253 MetaBlockSplit mb; 1254 InitMetaBlockSplit(&mb); 1255 if (!BrotliIsMostlyUTF8(input_buffer, metablock_start, mask, 1256 metablock_size, kMinUTF8Ratio)) { 1257 literal_context_mode = CONTEXT_SIGNED; 1258 } 1259 BrotliBuildMetaBlock(m, input_buffer, metablock_start, mask, ¶ms, 1260 prev_byte, prev_byte2, 1261 commands, num_commands, 1262 literal_context_mode, 1263 &mb); 1264 if (BROTLI_IS_OOM(m)) goto oom; 1265 BrotliOptimizeHistograms(num_direct_distance_codes, 1266 distance_postfix_bits, 1267 &mb); 1268 storage = BROTLI_ALLOC(m, uint8_t, 2 * metablock_size + 502); 1269 if (BROTLI_IS_OOM(m)) goto oom; 1270 storage[0] = last_byte; 1271 BrotliStoreMetaBlock(m, input_buffer, metablock_start, metablock_size, 1272 mask, prev_byte, prev_byte2, 1273 is_last, 1274 num_direct_distance_codes, 1275 distance_postfix_bits, 1276 literal_context_mode, 1277 commands, num_commands, 1278 &mb, 1279 &storage_ix, storage); 1280 if (BROTLI_IS_OOM(m)) goto oom; 1281 if (metablock_size + 4 < (storage_ix >> 3)) { 1282 /* Restore the distance cache and last byte. */ 1283 memcpy(dist_cache, saved_dist_cache, 4 * sizeof(dist_cache[0])); 1284 storage[0] = last_byte; 1285 storage_ix = last_byte_bits; 1286 BrotliStoreUncompressedMetaBlock(is_last, input_buffer, 1287 metablock_start, mask, 1288 metablock_size, &storage_ix, storage); 1289 } 1290 DestroyMetaBlockSplit(m, &mb); 1291 } 1292 last_byte = storage[storage_ix >> 3]; 1293 last_byte_bits = storage_ix & 7u; 1294 metablock_start += metablock_size; 1295 prev_byte = input_buffer[metablock_start - 1]; 1296 prev_byte2 = input_buffer[metablock_start - 2]; 1297 /* Save the state of the distance cache in case we need to restore it for 1298 emitting an uncompressed block. */ 1299 memcpy(saved_dist_cache, dist_cache, 4 * sizeof(dist_cache[0])); 1300 1301 { 1302 const size_t out_size = storage_ix >> 3; 1303 total_out_size += out_size; 1304 if (total_out_size <= max_out_size) { 1305 memcpy(encoded_buffer, storage, out_size); 1306 encoded_buffer += out_size; 1307 } else { 1308 ok = BROTLI_FALSE; 1309 } 1310 } 1311 BROTLI_FREE(m, storage); 1312 BROTLI_FREE(m, commands); 1313 } 1314 1315 *encoded_size = total_out_size; 1316 DestroyHasher(m, &hasher); 1317 return ok; 1318 1319oom: 1320 BrotliWipeOutMemoryManager(m); 1321 return BROTLI_FALSE; 1322} 1323 1324size_t BrotliEncoderMaxCompressedSize(size_t input_size) { 1325 /* [window bits / empty metadata] + N * [uncompressed] + [last empty] */ 1326 size_t num_large_blocks = input_size >> 24; 1327 size_t tail = input_size - (num_large_blocks << 24); 1328 size_t tail_overhead = (tail > (1 << 20)) ? 4 : 3; 1329 size_t overhead = 2 + (4 * num_large_blocks) + tail_overhead + 1; 1330 size_t result = input_size + overhead; 1331 if (input_size == 0) return 1; 1332 return (result < input_size) ? 0 : result; 1333} 1334 1335/* Wraps data to uncompressed brotli stream with minimal window size. 1336 |output| should point at region with at least BrotliEncoderMaxCompressedSize 1337 addressable bytes. 1338 Returns the length of stream. */ 1339static size_t MakeUncompressedStream( 1340 const uint8_t* input, size_t input_size, uint8_t* output) { 1341 size_t size = input_size; 1342 size_t result = 0; 1343 size_t offset = 0; 1344 if (input_size == 0) { 1345 output[0] = 6; 1346 return 1; 1347 } 1348 output[result++] = 0x21; /* window bits = 10, is_last = false */ 1349 output[result++] = 0x03; /* empty metadata, padding */ 1350 while (size > 0) { 1351 uint32_t nibbles = 0; 1352 uint32_t chunk_size; 1353 uint32_t bits; 1354 chunk_size = (size > (1u << 24)) ? (1u << 24) : (uint32_t)size; 1355 if (chunk_size > (1u << 16)) nibbles = (chunk_size > (1u << 20)) ? 2 : 1; 1356 bits = 1357 (nibbles << 1) | ((chunk_size - 1) << 3) | (1u << (19 + 4 * nibbles)); 1358 output[result++] = (uint8_t)bits; 1359 output[result++] = (uint8_t)(bits >> 8); 1360 output[result++] = (uint8_t)(bits >> 16); 1361 if (nibbles == 2) output[result++] = (uint8_t)(bits >> 24); 1362 memcpy(&output[result], &input[offset], chunk_size); 1363 result += chunk_size; 1364 offset += chunk_size; 1365 size -= chunk_size; 1366 } 1367 output[result++] = 3; 1368 return result; 1369} 1370 1371BROTLI_BOOL BrotliEncoderCompress( 1372 int quality, int lgwin, BrotliEncoderMode mode, size_t input_size, 1373 const uint8_t* input_buffer, size_t* encoded_size, 1374 uint8_t* encoded_buffer) { 1375 BrotliEncoderState* s; 1376 size_t out_size = *encoded_size; 1377 const uint8_t* input_start = input_buffer; 1378 uint8_t* output_start = encoded_buffer; 1379 size_t max_out_size = BrotliEncoderMaxCompressedSize(input_size); 1380 if (out_size == 0) { 1381 /* Output buffer needs at least one byte. */ 1382 return BROTLI_FALSE; 1383 } 1384 if (input_size == 0) { 1385 /* Handle the special case of empty input. */ 1386 *encoded_size = 1; 1387 *encoded_buffer = 6; 1388 return BROTLI_TRUE; 1389 } 1390 if (quality == 10) { 1391 /* TODO: Implement this direct path for all quality levels. */ 1392 const int lg_win = BROTLI_MIN(int, BROTLI_MAX_WINDOW_BITS, 1393 BROTLI_MAX(int, 16, lgwin)); 1394 int ok = BrotliCompressBufferQuality10(lg_win, input_size, input_buffer, 1395 encoded_size, encoded_buffer); 1396 if (!ok || (max_out_size && *encoded_size > max_out_size)) { 1397 goto fallback; 1398 } 1399 return BROTLI_TRUE; 1400 } 1401 1402 s = BrotliEncoderCreateInstance(0, 0, 0); 1403 if (!s) { 1404 return BROTLI_FALSE; 1405 } else { 1406 size_t available_in = input_size; 1407 const uint8_t* next_in = input_buffer; 1408 size_t available_out = *encoded_size; 1409 uint8_t* next_out = encoded_buffer; 1410 size_t total_out = 0; 1411 BROTLI_BOOL result = BROTLI_FALSE; 1412 BrotliEncoderSetParameter(s, BROTLI_PARAM_QUALITY, (uint32_t)quality); 1413 BrotliEncoderSetParameter(s, BROTLI_PARAM_LGWIN, (uint32_t)lgwin); 1414 BrotliEncoderSetParameter(s, BROTLI_PARAM_MODE, (uint32_t)mode); 1415 BrotliEncoderSetParameter(s, BROTLI_PARAM_SIZE_HINT, (uint32_t)input_size); 1416 result = BrotliEncoderCompressStream(s, BROTLI_OPERATION_FINISH, 1417 &available_in, &next_in, &available_out, &next_out, &total_out); 1418 if (!BrotliEncoderIsFinished(s)) result = 0; 1419 *encoded_size = total_out; 1420 BrotliEncoderDestroyInstance(s); 1421 if (!result || (max_out_size && *encoded_size > max_out_size)) { 1422 goto fallback; 1423 } 1424 return BROTLI_TRUE; 1425 } 1426fallback: 1427 *encoded_size = 0; 1428 if (!max_out_size) return BROTLI_FALSE; 1429 if (out_size >= max_out_size) { 1430 *encoded_size = 1431 MakeUncompressedStream(input_start, input_size, output_start); 1432 return BROTLI_TRUE; 1433 } 1434 return BROTLI_FALSE; 1435} 1436 1437static void InjectBytePaddingBlock(BrotliEncoderState* s) { 1438 uint32_t seal = s->last_byte_; 1439 size_t seal_bits = s->last_byte_bits_; 1440 uint8_t* destination; 1441 s->last_byte_ = 0; 1442 s->last_byte_bits_ = 0; 1443 /* is_last = 0, data_nibbles = 11, reserved = 0, meta_nibbles = 00 */ 1444 seal |= 0x6u << seal_bits; 1445 seal_bits += 6; 1446 /* If we have already created storage, then append to it. 1447 Storage is valid until next block is being compressed. */ 1448 if (s->next_out_) { 1449 destination = s->next_out_ + s->available_out_; 1450 } else { 1451 destination = s->tiny_buf_.u8; 1452 s->next_out_ = destination; 1453 } 1454 destination[0] = (uint8_t)seal; 1455 if (seal_bits > 8) destination[1] = (uint8_t)(seal >> 8); 1456 s->available_out_ += (seal_bits + 7) >> 3; 1457} 1458 1459/* Injects padding bits or pushes compressed data to output. 1460 Returns false if nothing is done. */ 1461static BROTLI_BOOL InjectFlushOrPushOutput(BrotliEncoderState* s, 1462 size_t* available_out, uint8_t** next_out, size_t* total_out) { 1463 if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED && 1464 s->last_byte_bits_ != 0) { 1465 InjectBytePaddingBlock(s); 1466 return BROTLI_TRUE; 1467 } 1468 1469 if (s->available_out_ != 0 && *available_out != 0) { 1470 size_t copy_output_size = 1471 BROTLI_MIN(size_t, s->available_out_, *available_out); 1472 memcpy(*next_out, s->next_out_, copy_output_size); 1473 *next_out += copy_output_size; 1474 *available_out -= copy_output_size; 1475 s->next_out_ += copy_output_size; 1476 s->available_out_ -= copy_output_size; 1477 s->total_out_ += copy_output_size; 1478 if (total_out) *total_out = s->total_out_; 1479 return BROTLI_TRUE; 1480 } 1481 1482 return BROTLI_FALSE; 1483} 1484 1485static void CheckFlushComplete(BrotliEncoderState* s) { 1486 if (s->stream_state_ == BROTLI_STREAM_FLUSH_REQUESTED && 1487 s->available_out_ == 0) { 1488 s->stream_state_ = BROTLI_STREAM_PROCESSING; 1489 s->next_out_ = 0; 1490 } 1491} 1492 1493static BROTLI_BOOL BrotliEncoderCompressStreamFast( 1494 BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in, 1495 const uint8_t** next_in, size_t* available_out, uint8_t** next_out, 1496 size_t* total_out) { 1497 const size_t block_size_limit = (size_t)1 << s->params.lgwin; 1498 const size_t buf_size = BROTLI_MIN(size_t, kCompressFragmentTwoPassBlockSize, 1499 BROTLI_MIN(size_t, *available_in, block_size_limit)); 1500 uint32_t* tmp_command_buf = NULL; 1501 uint32_t* command_buf = NULL; 1502 uint8_t* tmp_literal_buf = NULL; 1503 uint8_t* literal_buf = NULL; 1504 MemoryManager* m = &s->memory_manager_; 1505 if (s->params.quality != FAST_ONE_PASS_COMPRESSION_QUALITY && 1506 s->params.quality != FAST_TWO_PASS_COMPRESSION_QUALITY) { 1507 return BROTLI_FALSE; 1508 } 1509 if (s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { 1510 if (!s->command_buf_ && buf_size == kCompressFragmentTwoPassBlockSize) { 1511 s->command_buf_ = 1512 BROTLI_ALLOC(m, uint32_t, kCompressFragmentTwoPassBlockSize); 1513 s->literal_buf_ = 1514 BROTLI_ALLOC(m, uint8_t, kCompressFragmentTwoPassBlockSize); 1515 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 1516 } 1517 if (s->command_buf_) { 1518 command_buf = s->command_buf_; 1519 literal_buf = s->literal_buf_; 1520 } else { 1521 tmp_command_buf = BROTLI_ALLOC(m, uint32_t, buf_size); 1522 tmp_literal_buf = BROTLI_ALLOC(m, uint8_t, buf_size); 1523 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 1524 command_buf = tmp_command_buf; 1525 literal_buf = tmp_literal_buf; 1526 } 1527 } 1528 1529 while (BROTLI_TRUE) { 1530 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) { 1531 continue; 1532 } 1533 1534 /* Compress block only when internal output buffer is empty, stream is not 1535 finished, there is no pending flush request, and there is either 1536 additional input or pending operation. */ 1537 if (s->available_out_ == 0 && 1538 s->stream_state_ == BROTLI_STREAM_PROCESSING && 1539 (*available_in != 0 || op != BROTLI_OPERATION_PROCESS)) { 1540 size_t block_size = BROTLI_MIN(size_t, block_size_limit, *available_in); 1541 BROTLI_BOOL is_last = 1542 (*available_in == block_size) && (op == BROTLI_OPERATION_FINISH); 1543 BROTLI_BOOL force_flush = 1544 (*available_in == block_size) && (op == BROTLI_OPERATION_FLUSH); 1545 size_t max_out_size = 2 * block_size + 502; 1546 BROTLI_BOOL inplace = BROTLI_TRUE; 1547 uint8_t* storage = NULL; 1548 size_t storage_ix = s->last_byte_bits_; 1549 size_t table_size; 1550 int* table; 1551 1552 if (force_flush && block_size == 0) { 1553 s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED; 1554 continue; 1555 } 1556 if (max_out_size <= *available_out) { 1557 storage = *next_out; 1558 } else { 1559 inplace = BROTLI_FALSE; 1560 storage = GetBrotliStorage(s, max_out_size); 1561 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 1562 } 1563 storage[0] = s->last_byte_; 1564 table = GetHashTable(s, s->params.quality, block_size, &table_size); 1565 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 1566 1567 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY) { 1568 BrotliCompressFragmentFast(m, *next_in, block_size, is_last, table, 1569 table_size, s->cmd_depths_, s->cmd_bits_, &s->cmd_code_numbits_, 1570 s->cmd_code_, &storage_ix, storage); 1571 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 1572 } else { 1573 BrotliCompressFragmentTwoPass(m, *next_in, block_size, is_last, 1574 command_buf, literal_buf, table, table_size, 1575 &storage_ix, storage); 1576 if (BROTLI_IS_OOM(m)) return BROTLI_FALSE; 1577 } 1578 *next_in += block_size; 1579 *available_in -= block_size; 1580 if (inplace) { 1581 size_t out_bytes = storage_ix >> 3; 1582 assert(out_bytes <= *available_out); 1583 assert((storage_ix & 7) == 0 || out_bytes < *available_out); 1584 *next_out += out_bytes; 1585 *available_out -= out_bytes; 1586 s->total_out_ += out_bytes; 1587 if (total_out) *total_out = s->total_out_; 1588 } else { 1589 size_t out_bytes = storage_ix >> 3; 1590 s->next_out_ = storage; 1591 s->available_out_ = out_bytes; 1592 } 1593 s->last_byte_ = storage[storage_ix >> 3]; 1594 s->last_byte_bits_ = storage_ix & 7u; 1595 1596 if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED; 1597 if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED; 1598 continue; 1599 } 1600 break; 1601 } 1602 BROTLI_FREE(m, tmp_command_buf); 1603 BROTLI_FREE(m, tmp_literal_buf); 1604 CheckFlushComplete(s); 1605 return BROTLI_TRUE; 1606} 1607 1608static BROTLI_BOOL ProcessMetadata( 1609 BrotliEncoderState* s, size_t* available_in, const uint8_t** next_in, 1610 size_t* available_out, uint8_t** next_out, size_t* total_out) { 1611 if (*available_in > (1u << 24)) return BROTLI_FALSE; 1612 /* Switch to metadata block workflow, if required. */ 1613 if (s->stream_state_ == BROTLI_STREAM_PROCESSING) { 1614 s->remaining_metadata_bytes_ = (uint32_t)*available_in; 1615 s->stream_state_ = BROTLI_STREAM_METADATA_HEAD; 1616 } 1617 if (s->stream_state_ != BROTLI_STREAM_METADATA_HEAD && 1618 s->stream_state_ != BROTLI_STREAM_METADATA_BODY) { 1619 return BROTLI_FALSE; 1620 } 1621 1622 while (BROTLI_TRUE) { 1623 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) { 1624 continue; 1625 } 1626 if (s->available_out_ != 0) break; 1627 1628 if (s->input_pos_ != s->last_flush_pos_) { 1629 BROTLI_BOOL result = EncodeData(s, BROTLI_FALSE, BROTLI_TRUE, 1630 &s->available_out_, &s->next_out_); 1631 if (!result) return BROTLI_FALSE; 1632 continue; 1633 } 1634 1635 if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD) { 1636 s->next_out_ = s->tiny_buf_.u8; 1637 s->available_out_ = 1638 WriteMetadataHeader(s, s->remaining_metadata_bytes_, s->next_out_); 1639 s->stream_state_ = BROTLI_STREAM_METADATA_BODY; 1640 continue; 1641 } else { 1642 /* Exit workflow only when there is no more input and no more output. 1643 Otherwise client may continue producing empty metadata blocks. */ 1644 if (s->remaining_metadata_bytes_ == 0) { 1645 s->remaining_metadata_bytes_ = BROTLI_UINT32_MAX; 1646 s->stream_state_ = BROTLI_STREAM_PROCESSING; 1647 break; 1648 } 1649 if (*available_out) { 1650 /* Directly copy input to output. */ 1651 uint32_t copy = (uint32_t)BROTLI_MIN( 1652 size_t, s->remaining_metadata_bytes_, *available_out); 1653 memcpy(*next_out, *next_in, copy); 1654 *next_in += copy; 1655 *available_in -= copy; 1656 s->remaining_metadata_bytes_ -= copy; 1657 *next_out += copy; 1658 *available_out -= copy; 1659 } else { 1660 /* This guarantees progress in "TakeOutput" workflow. */ 1661 uint32_t copy = BROTLI_MIN(uint32_t, s->remaining_metadata_bytes_, 16); 1662 s->next_out_ = s->tiny_buf_.u8; 1663 memcpy(s->next_out_, *next_in, copy); 1664 *next_in += copy; 1665 *available_in -= copy; 1666 s->remaining_metadata_bytes_ -= copy; 1667 s->available_out_ = copy; 1668 } 1669 continue; 1670 } 1671 } 1672 1673 return BROTLI_TRUE; 1674} 1675 1676static void UpdateSizeHint(BrotliEncoderState* s, size_t available_in) { 1677 if (s->params.size_hint == 0) { 1678 uint64_t delta = UnprocessedInputSize(s); 1679 uint64_t tail = available_in; 1680 uint32_t limit = 1u << 30; 1681 uint32_t total; 1682 if ((delta >= limit) || (tail >= limit) || ((delta + tail) >= limit)) { 1683 total = limit; 1684 } else { 1685 total = (uint32_t)(delta + tail); 1686 } 1687 s->params.size_hint = total; 1688 } 1689} 1690 1691BROTLI_BOOL BrotliEncoderCompressStream( 1692 BrotliEncoderState* s, BrotliEncoderOperation op, size_t* available_in, 1693 const uint8_t** next_in, size_t* available_out,uint8_t** next_out, 1694 size_t* total_out) { 1695 if (!EnsureInitialized(s)) return BROTLI_FALSE; 1696 1697 /* Unfinished metadata block; check requirements. */ 1698 if (s->remaining_metadata_bytes_ != BROTLI_UINT32_MAX) { 1699 if (*available_in != s->remaining_metadata_bytes_) return BROTLI_FALSE; 1700 if (op != BROTLI_OPERATION_EMIT_METADATA) return BROTLI_FALSE; 1701 } 1702 1703 if (op == BROTLI_OPERATION_EMIT_METADATA) { 1704 UpdateSizeHint(s, 0); /* First data metablock might be emitted here. */ 1705 return ProcessMetadata( 1706 s, available_in, next_in, available_out, next_out, total_out); 1707 } 1708 1709 if (s->stream_state_ == BROTLI_STREAM_METADATA_HEAD || 1710 s->stream_state_ == BROTLI_STREAM_METADATA_BODY) { 1711 return BROTLI_FALSE; 1712 } 1713 1714 if (s->stream_state_ != BROTLI_STREAM_PROCESSING && *available_in != 0) { 1715 return BROTLI_FALSE; 1716 } 1717 if (s->params.quality == FAST_ONE_PASS_COMPRESSION_QUALITY || 1718 s->params.quality == FAST_TWO_PASS_COMPRESSION_QUALITY) { 1719 return BrotliEncoderCompressStreamFast(s, op, available_in, next_in, 1720 available_out, next_out, total_out); 1721 } 1722 while (BROTLI_TRUE) { 1723 size_t remaining_block_size = RemainingInputBlockSize(s); 1724 1725 if (remaining_block_size != 0 && *available_in != 0) { 1726 size_t copy_input_size = 1727 BROTLI_MIN(size_t, remaining_block_size, *available_in); 1728 CopyInputToRingBuffer(s, copy_input_size, *next_in); 1729 *next_in += copy_input_size; 1730 *available_in -= copy_input_size; 1731 continue; 1732 } 1733 1734 if (InjectFlushOrPushOutput(s, available_out, next_out, total_out)) { 1735 continue; 1736 } 1737 1738 /* Compress data only when internal output buffer is empty, stream is not 1739 finished and there is no pending flush request. */ 1740 if (s->available_out_ == 0 && 1741 s->stream_state_ == BROTLI_STREAM_PROCESSING) { 1742 if (remaining_block_size == 0 || op != BROTLI_OPERATION_PROCESS) { 1743 BROTLI_BOOL is_last = TO_BROTLI_BOOL( 1744 (*available_in == 0) && op == BROTLI_OPERATION_FINISH); 1745 BROTLI_BOOL force_flush = TO_BROTLI_BOOL( 1746 (*available_in == 0) && op == BROTLI_OPERATION_FLUSH); 1747 BROTLI_BOOL result; 1748 UpdateSizeHint(s, *available_in); 1749 result = EncodeData(s, is_last, force_flush, 1750 &s->available_out_, &s->next_out_); 1751 if (!result) return BROTLI_FALSE; 1752 if (force_flush) s->stream_state_ = BROTLI_STREAM_FLUSH_REQUESTED; 1753 if (is_last) s->stream_state_ = BROTLI_STREAM_FINISHED; 1754 continue; 1755 } 1756 } 1757 break; 1758 } 1759 CheckFlushComplete(s); 1760 return BROTLI_TRUE; 1761} 1762 1763BROTLI_BOOL BrotliEncoderIsFinished(BrotliEncoderState* s) { 1764 return TO_BROTLI_BOOL(s->stream_state_ == BROTLI_STREAM_FINISHED && 1765 !BrotliEncoderHasMoreOutput(s)); 1766} 1767 1768BROTLI_BOOL BrotliEncoderHasMoreOutput(BrotliEncoderState* s) { 1769 return TO_BROTLI_BOOL(s->available_out_ != 0); 1770} 1771 1772const uint8_t* BrotliEncoderTakeOutput(BrotliEncoderState* s, size_t* size) { 1773 size_t consumed_size = s->available_out_; 1774 uint8_t* result = s->next_out_; 1775 if (*size) { 1776 consumed_size = BROTLI_MIN(size_t, *size, s->available_out_); 1777 } 1778 if (consumed_size) { 1779 s->next_out_ += consumed_size; 1780 s->available_out_ -= consumed_size; 1781 s->total_out_ += consumed_size; 1782 CheckFlushComplete(s); 1783 *size = consumed_size; 1784 } else { 1785 *size = 0; 1786 result = 0; 1787 } 1788 return result; 1789} 1790 1791uint32_t BrotliEncoderVersion(void) { 1792 return BROTLI_VERSION; 1793} 1794 1795 1796/* DEPRECATED >>> */ 1797size_t BrotliEncoderInputBlockSize(BrotliEncoderState* s) { 1798 return InputBlockSize(s); 1799} 1800void BrotliEncoderCopyInputToRingBuffer(BrotliEncoderState* s, 1801 const size_t input_size, 1802 const uint8_t* input_buffer) { 1803 CopyInputToRingBuffer(s, input_size, input_buffer); 1804} 1805BROTLI_BOOL BrotliEncoderWriteData( 1806 BrotliEncoderState* s, const BROTLI_BOOL is_last, 1807 const BROTLI_BOOL force_flush, size_t* out_size, uint8_t** output) { 1808 return EncodeData(s, is_last, force_flush, out_size, output); 1809} 1810/* <<< DEPRECATED */ 1811 1812#if defined(__cplusplus) || defined(c_plusplus) 1813} /* extern "C" */ 1814#endif 1815