179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Copyright 2013 Google Inc. All Rights Reserved.
279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka//
379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Licensed under the Apache License, Version 2.0 (the "License");
479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// you may not use this file except in compliance with the License.
579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// You may obtain a copy of the License at
679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka//
779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// http://www.apache.org/licenses/LICENSE-2.0
879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka//
979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Unless required by applicable law or agreed to in writing, software
1079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// distributed under the License is distributed on an "AS IS" BASIS,
1179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// See the License for the specific language governing permissions and
1379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// limitations under the License.
1479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka//
1579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Implementation of Brotli compressor.
1679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
1779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include "./encode.h"
1879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
1979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include <algorithm>
2079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include <limits>
2179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
2279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include "./backward_references.h"
2379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include "./bit_cost.h"
2479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include "./block_splitter.h"
2579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include "./cluster.h"
2679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include "./context.h"
272733d6c0c2d6774e4c4d7a233ad200c5a832a133Zoltan Szabadka#include "./transform.h"
2879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include "./entropy_encode.h"
2979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include "./fast_log.h"
301571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka#include "./hash.h"
3179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include "./histogram.h"
321571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka#include "./literal_cost.h"
3379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include "./prefix.h"
3479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include "./write_bits.h"
3579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
3679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkanamespace brotli {
3779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
38437bbad37074e472b66d427814275de84ca77f19Roderick Sheeterstatic const int kWindowBits = 22;
39437bbad37074e472b66d427814275de84ca77f19Roderick Sheeter// To make decoding faster, we allow the decoder to write 16 bytes ahead in
40437bbad37074e472b66d427814275de84ca77f19Roderick Sheeter// its ringbuffer, therefore the encoder has to decrease max distance by this
41437bbad37074e472b66d427814275de84ca77f19Roderick Sheeter// amount.
42437bbad37074e472b66d427814275de84ca77f19Roderick Sheeterstatic const int kDecoderRingBufferWriteAheadSlack = 16;
43437bbad37074e472b66d427814275de84ca77f19Roderick Sheeterstatic const int kMaxBackwardDistance =
44437bbad37074e472b66d427814275de84ca77f19Roderick Sheeter    (1 << kWindowBits) - kDecoderRingBufferWriteAheadSlack;
45437bbad37074e472b66d427814275de84ca77f19Roderick Sheeter
46437bbad37074e472b66d427814275de84ca77f19Roderick Sheeterstatic const int kMetaBlockSizeBits = 21;
47437bbad37074e472b66d427814275de84ca77f19Roderick Sheeterstatic const int kRingBufferBits = 23;
48437bbad37074e472b66d427814275de84ca77f19Roderick Sheeterstatic const int kRingBufferMask = (1 << kRingBufferBits) - 1;
49437bbad37074e472b66d427814275de84ca77f19Roderick Sheeter
5079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkatemplate<int kSize>
5179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkadouble Entropy(const std::vector<Histogram<kSize> >& histograms) {
5279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  double retval = 0;
5379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  for (int i = 0; i < histograms.size(); ++i) {
5479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    retval += histograms[i].EntropyBitCost();
5579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
5679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  return retval;
5779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
5879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
591571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadkatemplate<int kSize>
601571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadkadouble TotalBitCost(const std::vector<Histogram<kSize> >& histograms) {
611571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  double retval = 0;
621571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  for (int i = 0; i < histograms.size(); ++i) {
631571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka    retval += PopulationCost(histograms[i]);
641571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  }
651571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  return retval;
661571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka}
671571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka
68e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadkavoid EncodeVarLenUint8(int n, int* storage_ix, uint8_t* storage) {
69e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  if (n == 0) {
70e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    WriteBits(1, 0, storage_ix, storage);
71e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  } else {
72e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    WriteBits(1, 1, storage_ix, storage);
73e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    int nbits = Log2Floor(n);
74e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    WriteBits(3, nbits, storage_ix, storage);
75e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    if (nbits > 0) {
76e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka      WriteBits(nbits, n - (1 << nbits), storage_ix, storage);
77e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    }
7879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
7979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
8079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
81cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadkaint ParseAsUTF8(int* symbol, const uint8_t* input, int size) {
82cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  // ASCII
83cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  if ((input[0] & 0x80) == 0) {
84cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    *symbol = input[0];
85cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    if (*symbol > 0) {
86cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka      return 1;
87cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    }
88cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  }
89cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  // 2-byte UTF8
90cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  if (size > 1 &&
91cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka      (input[0] & 0xe0) == 0xc0 &&
92cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka      (input[1] & 0xc0) == 0x80) {
93cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    *symbol = (((input[0] & 0x1f) << 6) |
94cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka               (input[1] & 0x3f));
95cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    if (*symbol > 0x7f) {
96cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka      return 2;
97cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    }
98cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  }
99cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  // 3-byte UFT8
100cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  if (size > 2 &&
101cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka      (input[0] & 0xf0) == 0xe0 &&
102cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka      (input[1] & 0xc0) == 0x80 &&
103cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka      (input[2] & 0xc0) == 0x80) {
104cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    *symbol = (((input[0] & 0x0f) << 12) |
105cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka               ((input[1] & 0x3f) << 6) |
106cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka               (input[2] & 0x3f));
107cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    if (*symbol > 0x7ff) {
108cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka      return 3;
109cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    }
110cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  }
111cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  // 4-byte UFT8
112cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  if (size > 3 &&
113cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka      (input[0] & 0xf8) == 0xf0 &&
114cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka      (input[1] & 0xc0) == 0x80 &&
115cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka      (input[2] & 0xc0) == 0x80 &&
116cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka      (input[3] & 0xc0) == 0x80) {
117cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    *symbol = (((input[0] & 0x07) << 18) |
118cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka               ((input[1] & 0x3f) << 12) |
119cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka               ((input[2] & 0x3f) << 6) |
120cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka               (input[3] & 0x3f));
121cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    if (*symbol > 0xffff && *symbol <= 0x10ffff) {
122cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka      return 4;
123cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    }
124cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  }
125cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  // Not UTF8, emit a special symbol above the UTF8-code space
126cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  *symbol = 0x110000 | input[0];
127cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  return 1;
128cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka}
129cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka
130cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka// Returns true if at least min_fraction of the data is UTF8-encoded.
131cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadkabool IsMostlyUTF8(const uint8_t* data, size_t length, double min_fraction) {
132cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  size_t size_utf8 = 0;
133cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  size_t pos = 0;
134cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  while (pos < length) {
135cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    int symbol;
136cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    int bytes_read = ParseAsUTF8(&symbol, data + pos, length - pos);
137cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    pos += bytes_read;
138cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    if (symbol < 0x110000) size_utf8 += bytes_read;
139cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  }
140cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  return size_utf8 > min_fraction * length;
141cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka}
142cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka
1431571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadkavoid EncodeMetaBlockLength(size_t meta_block_size,
144e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka                           bool is_last,
145e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka                           bool is_uncompressed,
14679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                           int* storage_ix, uint8_t* storage) {
147e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  WriteBits(1, is_last, storage_ix, storage);
148e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  if (is_last) {
149e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    if (meta_block_size == 0) {
150e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka      WriteBits(1, 1, storage_ix, storage);
151e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka      return;
152e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    }
153e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    WriteBits(1, 0, storage_ix, storage);
154e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  }
155e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  --meta_block_size;
1561571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  int num_bits = Log2Floor(meta_block_size) + 1;
157fe79fac8da1ec850d94679705a6f3405153f51ddZoltan Szabadka  if (num_bits < 16) {
158fe79fac8da1ec850d94679705a6f3405153f51ddZoltan Szabadka    num_bits = 16;
159fe79fac8da1ec850d94679705a6f3405153f51ddZoltan Szabadka  }
160fe79fac8da1ec850d94679705a6f3405153f51ddZoltan Szabadka  WriteBits(2, (num_bits - 13) >> 2, storage_ix, storage);
1611571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  while (num_bits > 0) {
1621571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka    WriteBits(4, meta_block_size & 0xf, storage_ix, storage);
1631571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka    meta_block_size >>= 4;
1641571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka    num_bits -= 4;
16579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
166e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  if (!is_last) {
167e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    WriteBits(1, is_uncompressed, storage_ix, storage);
168e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  }
16979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
17079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
17179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid StoreHuffmanTreeOfHuffmanTreeToBitMask(
17279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    const uint8_t* code_length_bitdepth,
17379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    int* storage_ix, uint8_t* storage) {
17479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  static const uint8_t kStorageOrder[kCodeLengthCodes] = {
175cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15,
17679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  };
17779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  // Throw away trailing zeros:
17879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int codes_to_store = kCodeLengthCodes;
179b89f3be40b69a01ce71e7fe62d1609886ed943aaZoltan Szabadka  for (; codes_to_store > 0; --codes_to_store) {
18079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    if (code_length_bitdepth[kStorageOrder[codes_to_store - 1]] != 0) {
18179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      break;
18279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    }
18379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
184b89f3be40b69a01ce71e7fe62d1609886ed943aaZoltan Szabadka  int num_codes = 0;
185b89f3be40b69a01ce71e7fe62d1609886ed943aaZoltan Szabadka  for (int i = 0; i < codes_to_store; ++i) {
186b89f3be40b69a01ce71e7fe62d1609886ed943aaZoltan Szabadka    if (code_length_bitdepth[kStorageOrder[i]] != 0) {
187b89f3be40b69a01ce71e7fe62d1609886ed943aaZoltan Szabadka      ++num_codes;
188b89f3be40b69a01ce71e7fe62d1609886ed943aaZoltan Szabadka    }
189b89f3be40b69a01ce71e7fe62d1609886ed943aaZoltan Szabadka  }
190b89f3be40b69a01ce71e7fe62d1609886ed943aaZoltan Szabadka  if (num_codes == 1) {
191b89f3be40b69a01ce71e7fe62d1609886ed943aaZoltan Szabadka    codes_to_store = kCodeLengthCodes;
192b89f3be40b69a01ce71e7fe62d1609886ed943aaZoltan Szabadka  }
1934c8c7fd31c6a9e03c6531b8ddc34fd071f6c9348Zoltan Szabadka  int skip_some = 0;  // skips none.
1944c8c7fd31c6a9e03c6531b8ddc34fd071f6c9348Zoltan Szabadka  if (code_length_bitdepth[kStorageOrder[0]] == 0 &&
1954c8c7fd31c6a9e03c6531b8ddc34fd071f6c9348Zoltan Szabadka      code_length_bitdepth[kStorageOrder[1]] == 0) {
1964c8c7fd31c6a9e03c6531b8ddc34fd071f6c9348Zoltan Szabadka    skip_some = 2;  // skips two.
1974c8c7fd31c6a9e03c6531b8ddc34fd071f6c9348Zoltan Szabadka    if (code_length_bitdepth[kStorageOrder[2]] == 0) {
1984c8c7fd31c6a9e03c6531b8ddc34fd071f6c9348Zoltan Szabadka      skip_some = 3;  // skips three.
1994c8c7fd31c6a9e03c6531b8ddc34fd071f6c9348Zoltan Szabadka    }
2004c8c7fd31c6a9e03c6531b8ddc34fd071f6c9348Zoltan Szabadka  }
2014c8c7fd31c6a9e03c6531b8ddc34fd071f6c9348Zoltan Szabadka  WriteBits(2, skip_some, storage_ix, storage);
2024c8c7fd31c6a9e03c6531b8ddc34fd071f6c9348Zoltan Szabadka  for (int i = skip_some; i < codes_to_store; ++i) {
2031571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka    uint8_t len[] = { 2, 4, 3, 2, 2, 4 };
204cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    uint8_t bits[] = { 0, 7, 3, 2, 1, 15 };
2051571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka    int v = code_length_bitdepth[kStorageOrder[i]];
2061571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka    WriteBits(len[v], bits[v], storage_ix, storage);
20779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
20879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
20979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
21079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid StoreHuffmanTreeToBitMask(
21179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    const uint8_t* huffman_tree,
21279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    const uint8_t* huffman_tree_extra_bits,
21379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    const int huffman_tree_size,
21479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    const EntropyCode<kCodeLengthCodes>& entropy,
21579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    int* storage_ix, uint8_t* storage) {
21679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  for (int i = 0; i < huffman_tree_size; ++i) {
21779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    const int ix = huffman_tree[i];
21879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    const int extra_bits = huffman_tree_extra_bits[i];
219278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    if (entropy.count_ > 1) {
220278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka      WriteBits(entropy.depth_[ix], entropy.bits_[ix], storage_ix, storage);
221278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    }
22279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    switch (ix) {
22379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      case 16:
22479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka        WriteBits(2, extra_bits, storage_ix, storage);
22579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka        break;
22679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      case 17:
22779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka        WriteBits(3, extra_bits, storage_ix, storage);
22879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka        break;
22979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    }
23079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
23179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
23279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
23379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkatemplate<int kSize>
234cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadkavoid StoreHuffmanCodeSimple(
235cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    const EntropyCode<kSize>& code, int alphabet_size,
236278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    int max_bits, int* storage_ix, uint8_t* storage) {
2371571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  const uint8_t *depth = &code.depth_[0];
238cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  int symbols[4];
239cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  // Quadratic sort.
240cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  int k, j;
241cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  for (k = 0; k < code.count_; ++k) {
242cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    symbols[k] = code.symbols_[k];
243cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  }
244cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  for (k = 0; k < code.count_; ++k) {
245cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    for (j = k + 1; j < code.count_; ++j) {
246cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka      if (depth[symbols[j]] < depth[symbols[k]]) {
247cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka        int t = symbols[k];
248cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka        symbols[k] = symbols[j];
249cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka        symbols[j] = t;
2501571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka      }
2511571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka    }
252cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  }
253cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  // Small tree marker to encode 1-4 symbols.
254cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  WriteBits(2, 1, storage_ix, storage);
255cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  WriteBits(2, code.count_ - 1, storage_ix, storage);
256cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  for (int i = 0; i < code.count_; ++i) {
257cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    WriteBits(max_bits, symbols[i], storage_ix, storage);
258cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  }
259cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  if (code.count_ == 4) {
260cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    if (depth[symbols[0]] == 2 &&
261cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka        depth[symbols[1]] == 2 &&
262cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka        depth[symbols[2]] == 2 &&
263cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka        depth[symbols[3]] == 2) {
264cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka      WriteBits(1, 0, storage_ix, storage);
265cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    } else {
266cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka      WriteBits(1, 1, storage_ix, storage);
26779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    }
26879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
269cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka}
270cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka
271cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadkatemplate<int kSize>
272cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadkavoid StoreHuffmanCodeComplex(
273cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    const EntropyCode<kSize>& code, int alphabet_size,
274cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    int* storage_ix, uint8_t* storage) {
275cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  const uint8_t *depth = &code.depth_[0];
27679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  uint8_t huffman_tree[kSize];
27779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  uint8_t huffman_tree_extra_bits[kSize];
27879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int huffman_tree_size = 0;
2791571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  WriteHuffmanTree(depth,
28079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                   alphabet_size,
28179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                   &huffman_tree[0],
28279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                   &huffman_tree_extra_bits[0],
28379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                   &huffman_tree_size);
28479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  Histogram<kCodeLengthCodes> huffman_tree_histogram;
28579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  memset(huffman_tree_histogram.data_, 0, sizeof(huffman_tree_histogram.data_));
28679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  for (int i = 0; i < huffman_tree_size; ++i) {
28779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    huffman_tree_histogram.Add(huffman_tree[i]);
28879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
28979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  EntropyCode<kCodeLengthCodes> huffman_tree_entropy;
2901571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  BuildEntropyCode(huffman_tree_histogram, 5, kCodeLengthCodes,
29179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                   &huffman_tree_entropy);
29279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  StoreHuffmanTreeOfHuffmanTreeToBitMask(
29379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      &huffman_tree_entropy.depth_[0], storage_ix, storage);
29479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  StoreHuffmanTreeToBitMask(&huffman_tree[0], &huffman_tree_extra_bits[0],
29579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                            huffman_tree_size, huffman_tree_entropy,
29679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                            storage_ix, storage);
29779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
29879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
299cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadkatemplate<int kSize>
300278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadkavoid BuildAndStoreEntropyCode(const Histogram<kSize>& histogram,
301278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                              const int tree_limit,
302278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                              const int alphabet_size,
303278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                              EntropyCode<kSize>* code,
304278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                              int* storage_ix, uint8_t* storage) {
305278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  memset(code->depth_, 0, sizeof(code->depth_));
306278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  memset(code->bits_, 0, sizeof(code->bits_));
307278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  memset(code->symbols_, 0, sizeof(code->symbols_));
308278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  code->count_ = 0;
309278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka
310cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  int max_bits_counter = alphabet_size - 1;
311cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  int max_bits = 0;
312cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  while (max_bits_counter) {
313cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    max_bits_counter >>= 1;
314cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    ++max_bits;
315cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  }
316278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka
317278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  for (size_t i = 0; i < alphabet_size; i++) {
318278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    if (histogram.data_[i] > 0) {
319278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka      if (code->count_ < 4) code->symbols_[code->count_] = i;
320278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka      ++code->count_;
321278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    }
322278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  }
323278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka
324278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  if (code->count_ <= 1) {
325278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    WriteBits(2, 1, storage_ix, storage);
326278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    WriteBits(2, 0, storage_ix, storage);
327278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    WriteBits(max_bits, code->symbols_[0], storage_ix, storage);
328278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    return;
329278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  }
330278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka
331278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  if (alphabet_size >= 50 && code->count_ >= 16) {
332278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    std::vector<int> counts(alphabet_size);
333278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    memcpy(&counts[0], histogram.data_, sizeof(counts[0]) * alphabet_size);
334278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    OptimizeHuffmanCountsForRle(alphabet_size, &counts[0]);
335278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    CreateHuffmanTree(&counts[0], alphabet_size, tree_limit, code->depth_);
336cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  } else {
337278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    CreateHuffmanTree(histogram.data_, alphabet_size, tree_limit, code->depth_);
338278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  }
339278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  ConvertBitDepthsToSymbols(code->depth_, alphabet_size, code->bits_);
340278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka
341278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  if (code->count_ <= 4) {
342278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    StoreHuffmanCodeSimple(*code, alphabet_size, max_bits, storage_ix, storage);
343278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  } else {
344278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    StoreHuffmanCodeComplex(*code, alphabet_size, storage_ix, storage);
345cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  }
346cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka}
347cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka
34879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkatemplate<int kSize>
349278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadkavoid BuildAndStoreEntropyCodes(
350278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    const std::vector<Histogram<kSize> >& histograms,
351278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    int alphabet_size,
352278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    std::vector<EntropyCode<kSize> >* entropy_codes,
353278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    int* storage_ix, uint8_t* storage) {
354278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  entropy_codes->resize(histograms.size());
355278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  for (int i = 0; i < histograms.size(); ++i) {
356278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    BuildAndStoreEntropyCode(histograms[i], 15, alphabet_size,
357278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                             &(*entropy_codes)[i],
358278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                             storage_ix, storage);
35979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
36079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
36179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
36279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid EncodeCommand(const Command& cmd,
36379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                   const EntropyCodeCommand& entropy,
36479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                   int* storage_ix, uint8_t* storage) {
36579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int code = cmd.command_prefix_;
366278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  WriteBits(entropy.depth_[code], entropy.bits_[code], storage_ix, storage);
36779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  if (code >= 128) {
36879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    code -= 128;
36979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
37079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int insert_extra_bits = InsertLengthExtraBits(code);
37179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  uint64_t insert_extra_bits_val =
37279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      cmd.insert_length_ - InsertLengthOffset(code);
37379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int copy_extra_bits = CopyLengthExtraBits(code);
374437bbad37074e472b66d427814275de84ca77f19Roderick Sheeter  uint64_t copy_extra_bits_val = cmd.copy_length_code_ - CopyLengthOffset(code);
37579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  if (insert_extra_bits > 0) {
37679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    WriteBits(insert_extra_bits, insert_extra_bits_val, storage_ix, storage);
37779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
37879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  if (copy_extra_bits > 0) {
37979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    WriteBits(copy_extra_bits, copy_extra_bits_val, storage_ix, storage);
38079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
38179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
38279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
38379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid EncodeCopyDistance(const Command& cmd, const EntropyCodeDistance& entropy,
38479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                        int* storage_ix, uint8_t* storage) {
38579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int code = cmd.distance_prefix_;
38679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int extra_bits = cmd.distance_extra_bits_;
38779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  uint64_t extra_bits_val = cmd.distance_extra_bits_value_;
388278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  WriteBits(entropy.depth_[code], entropy.bits_[code], storage_ix, storage);
38979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  if (extra_bits > 0) {
39079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    WriteBits(extra_bits, extra_bits_val, storage_ix, storage);
39179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
39279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
39379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
3941571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadkavoid ComputeDistanceShortCodes(std::vector<Command>* cmds,
395278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                               size_t pos,
396278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                               const size_t max_backward,
3971571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                               int* dist_ringbuffer,
3981571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                               size_t* ringbuffer_idx) {
39979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  static const int kIndexOffset[16] = {
40079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    3, 2, 1, 0, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2
40179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  };
40279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  static const int kValueOffset[16] = {
40379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    0, 0, 0, 0, -1, 1, -2, 2, -3, 3, -1, 1, -2, 2, -3, 3
40479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  };
40579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  for (int i = 0; i < cmds->size(); ++i) {
406278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    pos += (*cmds)[i].insert_length_;
407278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    size_t max_distance = std::min(pos, max_backward);
40879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    int cur_dist = (*cmds)[i].copy_distance_;
40979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    int dist_code = cur_dist + 16;
410278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    if (cur_dist <= max_distance) {
411278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka      if (cur_dist == 0) break;
412278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka      int limits[16] = { 0, 0, 0, 0,
413278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                         6, 6, 11, 11,
414278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                         11, 11, 11, 11,
415278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                         12, 12, 12, 12 };
416278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka      for (int k = 0; k < 16; ++k) {
417278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka        // Only accept more popular choices.
418278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka        if (cur_dist < limits[k]) {
419278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka          // Typically unpopular ranges, don't replace a short distance
420278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka          // with them.
421278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka          continue;
422278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka        }
423278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka        int comp = (dist_ringbuffer[(*ringbuffer_idx + kIndexOffset[k]) & 3] +
424278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                    kValueOffset[k]);
425278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka        if (cur_dist == comp) {
426278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka          dist_code = k + 1;
427278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka          break;
428278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka        }
42979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      }
430278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka      if (dist_code > 1) {
431278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka        dist_ringbuffer[*ringbuffer_idx & 3] = cur_dist;
432278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka        ++(*ringbuffer_idx);
43379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      }
434278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka      pos += (*cmds)[i].copy_length_;
435278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    } else {
436278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka      int word_idx = cur_dist - max_distance - 1;
437278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka      const std::string word =
438278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka          GetTransformedDictionaryWord((*cmds)[i].copy_length_code_, word_idx);
439278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka      pos += word.size();
44079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    }
44179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    (*cmds)[i].distance_code_ = dist_code;
44279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
44379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
44479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
44579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid ComputeCommandPrefixes(std::vector<Command>* cmds,
44679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                            int num_direct_distance_codes,
44779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                            int distance_postfix_bits) {
44879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  for (int i = 0; i < cmds->size(); ++i) {
44979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    Command* cmd = &(*cmds)[i];
45079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    cmd->command_prefix_ = CommandPrefix(cmd->insert_length_,
451437bbad37074e472b66d427814275de84ca77f19Roderick Sheeter                                         cmd->copy_length_code_);
452437bbad37074e472b66d427814275de84ca77f19Roderick Sheeter    if (cmd->copy_length_code_ > 0) {
45379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      PrefixEncodeCopyDistance(cmd->distance_code_,
45479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                               num_direct_distance_codes,
45579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                               distance_postfix_bits,
45679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                               &cmd->distance_prefix_,
45779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                               &cmd->distance_extra_bits_,
45879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                               &cmd->distance_extra_bits_value_);
45979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    }
46079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    if (cmd->command_prefix_ < 128 && cmd->distance_prefix_ == 0) {
46179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      cmd->distance_prefix_ = 0xffff;
46279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    } else {
46379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      cmd->command_prefix_ += 128;
46479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    }
46579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
46679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
46779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
46879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkaint IndexOf(const std::vector<int>& v, int value) {
46979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  for (int i = 0; i < v.size(); ++i) {
47079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    if (v[i] == value) return i;
47179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
47279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  return -1;
47379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
47479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
47579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid MoveToFront(std::vector<int>* v, int index) {
47679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int value = (*v)[index];
47779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  for (int i = index; i > 0; --i) {
47879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    (*v)[i] = (*v)[i - 1];
47979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
48079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  (*v)[0] = value;
48179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
48279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
48379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkastd::vector<int> MoveToFrontTransform(const std::vector<int>& v) {
48479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  if (v.empty()) return v;
48579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  std::vector<int> mtf(*max_element(v.begin(), v.end()) + 1);
48679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  for (int i = 0; i < mtf.size(); ++i) mtf[i] = i;
48779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  std::vector<int> result(v.size());
48879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  for (int i = 0; i < v.size(); ++i) {
48979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    int index = IndexOf(mtf, v[i]);
49079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    result[i] = index;
49179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    MoveToFront(&mtf, index);
49279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
49379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  return result;
49479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
49579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
49679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Finds runs of zeros in v_in and replaces them with a prefix code of the run
49779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// length plus extra bits in *v_out and *extra_bits. Non-zero values in v_in are
49879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// shifted by *max_length_prefix. Will not create prefix codes bigger than the
49979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// initial value of *max_run_length_prefix. The prefix code of run length L is
50079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// simply Log2Floor(L) and the number of extra bits is the same as the prefix
50179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// code.
50279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid RunLengthCodeZeros(const std::vector<int>& v_in,
50379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                        int* max_run_length_prefix,
50479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                        std::vector<int>* v_out,
50579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                        std::vector<int>* extra_bits) {
50679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int max_reps = 0;
50779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  for (int i = 0; i < v_in.size();) {
50879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    for (; i < v_in.size() && v_in[i] != 0; ++i) ;
50979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    int reps = 0;
51079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    for (; i < v_in.size() && v_in[i] == 0; ++i) {
51179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      ++reps;
51279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    }
51379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    max_reps = std::max(reps, max_reps);
51479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
51579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int max_prefix = max_reps > 0 ? Log2Floor(max_reps) : 0;
51679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  *max_run_length_prefix = std::min(max_prefix, *max_run_length_prefix);
51779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  for (int i = 0; i < v_in.size();) {
51879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    if (v_in[i] != 0) {
51979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      v_out->push_back(v_in[i] + *max_run_length_prefix);
52079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      extra_bits->push_back(0);
52179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      ++i;
52279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    } else {
52379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      int reps = 1;
52479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      for (uint32_t k = i + 1; k < v_in.size() && v_in[k] == 0; ++k) {
52579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka        ++reps;
52679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      }
52779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      i += reps;
52879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      while (reps) {
52979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka        if (reps < (2 << *max_run_length_prefix)) {
53079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka          int run_length_prefix = Log2Floor(reps);
53179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka          v_out->push_back(run_length_prefix);
53279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka          extra_bits->push_back(reps - (1 << run_length_prefix));
53379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka          break;
53479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka        } else {
53579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka          v_out->push_back(*max_run_length_prefix);
53679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka          extra_bits->push_back((1 << *max_run_length_prefix) - 1);
53779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka          reps -= (2 << *max_run_length_prefix) - 1;
53879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka        }
53979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      }
54079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    }
54179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
54279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
54379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
54479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Returns a maximum zero-run-length-prefix value such that run-length coding
54579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// zeros in v with this maximum prefix value and then encoding the resulting
54679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// histogram and entropy-coding v produces the least amount of bits.
54779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkaint BestMaxZeroRunLengthPrefix(const std::vector<int>& v) {
54879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int min_cost = std::numeric_limits<int>::max();
54979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int best_max_prefix = 0;
55079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  for (int max_prefix = 0; max_prefix <= 16; ++max_prefix) {
55179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    std::vector<int> rle_symbols;
55279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    std::vector<int> extra_bits;
55379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    int max_run_length_prefix = max_prefix;
55479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    RunLengthCodeZeros(v, &max_run_length_prefix, &rle_symbols, &extra_bits);
55579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    if (max_run_length_prefix < max_prefix) break;
55605b3775ecbd31f3a8a4f9ff5a3afe72a926db447Zoltan Szabadka    HistogramContextMap histogram;
55779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    for (int i = 0; i < rle_symbols.size(); ++i) {
55879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      histogram.Add(rle_symbols[i]);
55979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    }
56079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    int bit_cost = PopulationCost(histogram);
56179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    if (max_prefix > 0) {
56279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      bit_cost += 4;
56379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    }
56479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    for (int i = 1; i <= max_prefix; ++i) {
56579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      bit_cost += histogram.data_[i] * i;  // extra bits
56679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    }
56779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    if (bit_cost < min_cost) {
56879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      min_cost = bit_cost;
56979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      best_max_prefix = max_prefix;
57079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    }
57179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
57279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  return best_max_prefix;
57379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
57479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
57579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid EncodeContextMap(const std::vector<int>& context_map,
57679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                      int num_clusters,
57779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                      int* storage_ix, uint8_t* storage) {
578e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  EncodeVarLenUint8(num_clusters - 1, storage_ix, storage);
57979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
580437bbad37074e472b66d427814275de84ca77f19Roderick Sheeter  if (num_clusters == 1) {
58179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    return;
58279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
58379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
58479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  std::vector<int> transformed_symbols = MoveToFrontTransform(context_map);
58579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  std::vector<int> rle_symbols;
58679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  std::vector<int> extra_bits;
58779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int max_run_length_prefix = BestMaxZeroRunLengthPrefix(transformed_symbols);
58879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix,
58979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                     &rle_symbols, &extra_bits);
590e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  HistogramContextMap symbol_histogram;
59179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  for (int i = 0; i < rle_symbols.size(); ++i) {
59279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    symbol_histogram.Add(rle_symbols[i]);
59379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
59479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  bool use_rle = max_run_length_prefix > 0;
59579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  WriteBits(1, use_rle, storage_ix, storage);
59679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  if (use_rle) {
59779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    WriteBits(4, max_run_length_prefix - 1, storage_ix, storage);
59879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
599278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  EntropyCodeContextMap symbol_code;
600278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  BuildAndStoreEntropyCode(symbol_histogram, 15,
601278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                           num_clusters + max_run_length_prefix,
602278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                           &symbol_code,
603278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                           storage_ix, storage);
60479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  for (int i = 0; i < rle_symbols.size(); ++i) {
605278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    WriteBits(symbol_code.depth_[rle_symbols[i]],
606278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka              symbol_code.bits_[rle_symbols[i]],
607278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka              storage_ix, storage);
60879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) {
60979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage);
61079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    }
61179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
61279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  WriteBits(1, 1, storage_ix, storage);  // use move-to-front
61379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
61479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
61579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkastruct BlockSplitCode {
616e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  EntropyCodeBlockType block_type_code;
61779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  EntropyCodeBlockLength block_len_code;
61879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka};
61979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
62079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid EncodeBlockLength(const EntropyCodeBlockLength& entropy,
62179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                       int length,
62279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                       int* storage_ix, uint8_t* storage) {
62379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int len_code = BlockLengthPrefix(length);
62479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int extra_bits = BlockLengthExtraBits(len_code);
62579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int extra_bits_value = length - BlockLengthOffset(len_code);
626278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  WriteBits(entropy.depth_[len_code], entropy.bits_[len_code],
627278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka            storage_ix, storage);
62879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  if (extra_bits > 0) {
62979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    WriteBits(extra_bits, extra_bits_value, storage_ix, storage);
63079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
63179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
63279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
63379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid ComputeBlockTypeShortCodes(BlockSplit* split) {
63479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  if (split->num_types_ <= 1) {
63579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    split->num_types_ = 1;
63679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    return;
63779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
63879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int ringbuffer[2] = { 0, 1 };
63979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  size_t index = 0;
64079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  for (int i = 0; i < split->types_.size(); ++i) {
64179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    int type = split->types_[i];
64279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    int type_code;
64379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    if (type == ringbuffer[index & 1]) {
64479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      type_code = 0;
64579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    } else if (type == ringbuffer[(index - 1) & 1] + 1) {
64679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      type_code = 1;
64779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    } else {
64879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      type_code = type + 2;
64979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    }
65079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    ringbuffer[index & 1] = type;
65179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    ++index;
65279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    split->type_codes_.push_back(type_code);
65379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
65479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
65579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
65679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid BuildAndEncodeBlockSplitCode(const BlockSplit& split,
65779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                                  BlockSplitCode* code,
65879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                                  int* storage_ix, uint8_t* storage) {
659e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  EncodeVarLenUint8(split.num_types_ - 1, storage_ix, storage);
660278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka
661e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  if (split.num_types_ == 1) {
66279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    return;
66379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
664fe79fac8da1ec850d94679705a6f3405153f51ddZoltan Szabadka
665e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  HistogramBlockType type_histo;
666278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  for (int i = 1; i < split.type_codes_.size(); ++i) {
66779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    type_histo.Add(split.type_codes_[i]);
66879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
66979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  HistogramBlockLength length_histo;
67079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  for (int i = 0; i < split.lengths_.size(); ++i) {
67179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    length_histo.Add(BlockLengthPrefix(split.lengths_[i]));
67279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
673278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  BuildAndStoreEntropyCode(type_histo, 15, split.num_types_ + 2,
674278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                           &code->block_type_code,
675278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                           storage_ix, storage);
676278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  BuildAndStoreEntropyCode(length_histo, 15, kNumBlockLenPrefixes,
677278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                           &code->block_len_code,
678278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                           storage_ix, storage);
67979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  EncodeBlockLength(code->block_len_code, split.lengths_[0],
68079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                    storage_ix, storage);
68179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
68279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
68379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid MoveAndEncode(const BlockSplitCode& code,
68479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                   BlockSplitIterator* it,
68579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                   int* storage_ix, uint8_t* storage) {
68679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  if (it->length_ == 0) {
68779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    ++it->idx_;
68879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    it->type_ = it->split_.types_[it->idx_];
68979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    it->length_ = it->split_.lengths_[it->idx_];
690e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    int type_code = it->split_.type_codes_[it->idx_];
691278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    WriteBits(code.block_type_code.depth_[type_code],
692278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka              code.block_type_code.bits_[type_code],
693278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka              storage_ix, storage);
69479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    EncodeBlockLength(code.block_len_code, it->length_, storage_ix, storage);
69579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
69679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  --it->length_;
69779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
69879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
69979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkastruct EncodingParams {
70079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int num_direct_distance_codes;
70179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int distance_postfix_bits;
70279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int literal_context_mode;
70379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka};
70479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
70579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkastruct MetaBlock {
70679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  std::vector<Command> cmds;
70779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  EncodingParams params;
70879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  BlockSplit literal_split;
70979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  BlockSplit command_split;
71079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  BlockSplit distance_split;
7111571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  std::vector<int> literal_context_modes;
71279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  std::vector<int> literal_context_map;
71379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  std::vector<int> distance_context_map;
71479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  std::vector<HistogramLiteral> literal_histograms;
71579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  std::vector<HistogramCommand> command_histograms;
71679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  std::vector<HistogramDistance> distance_histograms;
71779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka};
71879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
71979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid BuildMetaBlock(const EncodingParams& params,
72079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                    const std::vector<Command>& cmds,
7211571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                    const uint8_t* ringbuffer,
7221571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                    const size_t pos,
7231571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                    const size_t mask,
72479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                    MetaBlock* mb) {
72579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  mb->cmds = cmds;
72679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  mb->params = params;
727e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  if (cmds.empty()) {
728e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    return;
729e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  }
73079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  ComputeCommandPrefixes(&mb->cmds,
73179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                         mb->params.num_direct_distance_codes,
73279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                         mb->params.distance_postfix_bits);
73379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  SplitBlock(mb->cmds,
7341571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka             &ringbuffer[pos & mask],
73579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka             &mb->literal_split,
73679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka             &mb->command_split,
73779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka             &mb->distance_split);
73879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  ComputeBlockTypeShortCodes(&mb->literal_split);
73979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  ComputeBlockTypeShortCodes(&mb->command_split);
74079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  ComputeBlockTypeShortCodes(&mb->distance_split);
74179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
7421571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  mb->literal_context_modes.resize(mb->literal_split.num_types_,
7431571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                                   mb->params.literal_context_mode);
7441571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka
7451571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka
74679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int num_literal_contexts =
7471571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka      mb->literal_split.num_types_ << kLiteralContextBits;
74879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int num_distance_contexts =
7491571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka      mb->distance_split.num_types_ << kDistanceContextBits;
75079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  std::vector<HistogramLiteral> literal_histograms(num_literal_contexts);
75179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  mb->command_histograms.resize(mb->command_split.num_types_);
75279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  std::vector<HistogramDistance> distance_histograms(num_distance_contexts);
75379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  BuildHistograms(mb->cmds,
75479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                  mb->literal_split,
75579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                  mb->command_split,
75679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                  mb->distance_split,
7571571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                  ringbuffer,
75879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                  pos,
7591571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                  mask,
7601571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                  mb->literal_context_modes,
76179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                  &literal_histograms,
76279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                  &mb->command_histograms,
76379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                  &distance_histograms);
76479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
765e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  // Histogram ids need to fit in one byte.
766e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  static const int kMaxNumberOfHistograms = 256;
76779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
76879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  mb->literal_histograms = literal_histograms;
7691571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  ClusterHistograms(literal_histograms,
7701571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                    1 << kLiteralContextBits,
7711571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                    mb->literal_split.num_types_,
7721571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                    kMaxNumberOfHistograms,
7731571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                    &mb->literal_histograms,
7741571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                    &mb->literal_context_map);
77579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
77679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  mb->distance_histograms = distance_histograms;
7771571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  ClusterHistograms(distance_histograms,
7781571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                    1 << kDistanceContextBits,
7791571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                    mb->distance_split.num_types_,
7801571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                    kMaxNumberOfHistograms,
7811571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                    &mb->distance_histograms,
7821571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                    &mb->distance_context_map);
78379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
78479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
78579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkasize_t MetaBlockLength(const std::vector<Command>& cmds) {
78679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  size_t length = 0;
78779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  for (int i = 0; i < cmds.size(); ++i) {
78879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    const Command& cmd = cmds[i];
78979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    length += cmd.insert_length_ + cmd.copy_length_;
79079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
79179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  return length;
79279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
79379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
79479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid StoreMetaBlock(const MetaBlock& mb,
795e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka                    const bool is_last,
7961571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                    const uint8_t* ringbuffer,
7971571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                    const size_t mask,
79879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                    size_t* pos,
79979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                    int* storage_ix, uint8_t* storage) {
80079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  size_t length = MetaBlockLength(mb.cmds);
80179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  const size_t end_pos = *pos + length;
802278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  EncodeMetaBlockLength(length, is_last, false, storage_ix, storage);
803278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka
804e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  if (length == 0) {
805e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    return;
806e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  }
80779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  BlockSplitCode literal_split_code;
80879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  BlockSplitCode command_split_code;
80979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  BlockSplitCode distance_split_code;
81079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  BuildAndEncodeBlockSplitCode(mb.literal_split, &literal_split_code,
81179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                               storage_ix, storage);
81279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  BuildAndEncodeBlockSplitCode(mb.command_split, &command_split_code,
81379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                               storage_ix, storage);
81479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  BuildAndEncodeBlockSplitCode(mb.distance_split, &distance_split_code,
81579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                               storage_ix, storage);
81679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  WriteBits(2, mb.params.distance_postfix_bits, storage_ix, storage);
81779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  WriteBits(4,
81879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka            mb.params.num_direct_distance_codes >>
819278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka            mb.params.distance_postfix_bits,
820278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka            storage_ix, storage);
82179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int num_distance_codes =
82279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      kNumDistanceShortCodes + mb.params.num_direct_distance_codes +
82379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      (48 << mb.params.distance_postfix_bits);
8241571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  for (int i = 0; i < mb.literal_split.num_types_; ++i) {
8251571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka    WriteBits(2, mb.literal_context_modes[i], storage_ix, storage);
8261571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  }
827278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  EncodeContextMap(mb.literal_context_map, mb.literal_histograms.size(),
828278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                   storage_ix, storage);
829278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  EncodeContextMap(mb.distance_context_map, mb.distance_histograms.size(),
830278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                   storage_ix, storage);
83179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  std::vector<EntropyCodeLiteral> literal_codes;
83279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  std::vector<EntropyCodeCommand> command_codes;
83379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  std::vector<EntropyCodeDistance> distance_codes;
834278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  BuildAndStoreEntropyCodes(mb.literal_histograms, 256, &literal_codes,
835278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                            storage_ix, storage);
836278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  BuildAndStoreEntropyCodes(mb.command_histograms, kNumCommandPrefixes,
837278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                            &command_codes, storage_ix, storage);
838278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  BuildAndStoreEntropyCodes(mb.distance_histograms, num_distance_codes,
839278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                            &distance_codes, storage_ix, storage);
84079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  BlockSplitIterator literal_it(mb.literal_split);
84179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  BlockSplitIterator command_it(mb.command_split);
84279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  BlockSplitIterator distance_it(mb.distance_split);
84379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  for (int i = 0; i < mb.cmds.size(); ++i) {
84479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    const Command& cmd = mb.cmds[i];
84579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    MoveAndEncode(command_split_code, &command_it, storage_ix, storage);
84679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    EncodeCommand(cmd, command_codes[command_it.type_], storage_ix, storage);
84779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    for (int j = 0; j < cmd.insert_length_; ++j) {
84879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      MoveAndEncode(literal_split_code, &literal_it, storage_ix, storage);
84979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      int histogram_idx = literal_it.type_;
8501571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka      uint8_t prev_byte = *pos > 0 ? ringbuffer[(*pos - 1) & mask] : 0;
8511571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka      uint8_t prev_byte2 = *pos > 1 ? ringbuffer[(*pos - 2) & mask] : 0;
8521571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka      int context = ((literal_it.type_ << kLiteralContextBits) +
8531571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                     Context(prev_byte, prev_byte2,
8541571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                             mb.literal_context_modes[literal_it.type_]));
8551571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka      histogram_idx = mb.literal_context_map[context];
856278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka      int literal = ringbuffer[*pos & mask];
857278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka      WriteBits(literal_codes[histogram_idx].depth_[literal],
858278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                literal_codes[histogram_idx].bits_[literal],
859278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                storage_ix, storage);
8601571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka      ++(*pos);
86179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    }
86279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    if (*pos < end_pos && cmd.distance_prefix_ != 0xffff) {
86379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      MoveAndEncode(distance_split_code, &distance_it, storage_ix, storage);
8641571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka      int context = (distance_it.type_ << 2) +
865437bbad37074e472b66d427814275de84ca77f19Roderick Sheeter          ((cmd.copy_length_code_ > 4) ? 3 : cmd.copy_length_code_ - 2);
866437bbad37074e472b66d427814275de84ca77f19Roderick Sheeter      int histogram_index = mb.distance_context_map[context];
867437bbad37074e472b66d427814275de84ca77f19Roderick Sheeter      size_t max_distance = std::min(*pos, (size_t)kMaxBackwardDistance);
86879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      EncodeCopyDistance(cmd, distance_codes[histogram_index],
86979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                         storage_ix, storage);
87079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    }
87179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    *pos += cmd.copy_length_;
87279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
87379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
87479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
875278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan SzabadkaBrotliCompressor::BrotliCompressor(BrotliParams params)
876278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    : params_(params),
877278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka      window_bits_(kWindowBits),
878278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka      hashers_(new Hashers()),
8791571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka      dist_ringbuffer_idx_(0),
8801571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka      input_pos_(0),
8811571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka      ringbuffer_(kRingBufferBits, kMetaBlockSizeBits),
8821571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka      literal_cost_(1 << kRingBufferBits),
8831571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka      storage_ix_(0),
8841571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka      storage_(new uint8_t[2 << kMetaBlockSizeBits]) {
885fe79fac8da1ec850d94679705a6f3405153f51ddZoltan Szabadka  dist_ringbuffer_[0] = 16;
886fe79fac8da1ec850d94679705a6f3405153f51ddZoltan Szabadka  dist_ringbuffer_[1] = 15;
887fe79fac8da1ec850d94679705a6f3405153f51ddZoltan Szabadka  dist_ringbuffer_[2] = 11;
888fe79fac8da1ec850d94679705a6f3405153f51ddZoltan Szabadka  dist_ringbuffer_[3] = 4;
889437bbad37074e472b66d427814275de84ca77f19Roderick Sheeter  storage_[0] = 0;
890278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  switch (params.mode) {
891278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    case BrotliParams::MODE_TEXT: hash_type_ = Hashers::HASH_15_8_4; break;
892278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    case BrotliParams::MODE_FONT: hash_type_ = Hashers::HASH_15_8_2; break;
893278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    default: break;
894278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  }
895278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  hashers_->Init(hash_type_);
896278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  if (params.mode == BrotliParams::MODE_TEXT) {
897278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    StoreDictionaryWordHashes();
898278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  }
899437bbad37074e472b66d427814275de84ca77f19Roderick Sheeter}
9001571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka
9011571db36a9b00e895882ee236e9f84d62f8ea226Zoltan SzabadkaBrotliCompressor::~BrotliCompressor() {
9021571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  delete[] storage_;
9031571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka}
9041571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka
905278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan SzabadkaStaticDictionary *BrotliCompressor::static_dictionary_ = NULL;
906278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka
9072733d6c0c2d6774e4c4d7a233ad200c5a832a133Zoltan Szabadkavoid BrotliCompressor::StoreDictionaryWordHashes() {
908278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  const int num_transforms = kNumTransforms;
909278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  if (static_dictionary_ == NULL) {
910278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    static_dictionary_ = new StaticDictionary;
911278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    for (int t = num_transforms - 1; t >= 0; --t) {
9120829e37293abc2523a1d2b0f4d68ff7b5fcd8e01Zoltan Szabadka      for (int i = kMaxDictionaryWordLength;
9130829e37293abc2523a1d2b0f4d68ff7b5fcd8e01Zoltan Szabadka           i >= kMinDictionaryWordLength; --i) {
914278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka        const int num_words = 1 << kBrotliDictionarySizeBitsByLength[i];
915278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka        for (int j = num_words - 1; j >= 0; --j) {
916278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka          int word_id = t * num_words + j;
917278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka          std::string word = GetTransformedDictionaryWord(i, word_id);
9180829e37293abc2523a1d2b0f4d68ff7b5fcd8e01Zoltan Szabadka          if (word.size() >= 4) {
919278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka            static_dictionary_->Insert(word, i, word_id);
920278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka          }
9212733d6c0c2d6774e4c4d7a233ad200c5a832a133Zoltan Szabadka        }
9222733d6c0c2d6774e4c4d7a233ad200c5a832a133Zoltan Szabadka      }
9232733d6c0c2d6774e4c4d7a233ad200c5a832a133Zoltan Szabadka    }
9242733d6c0c2d6774e4c4d7a233ad200c5a832a133Zoltan Szabadka  }
925278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  hashers_->SetStaticDictionary(static_dictionary_);
9262733d6c0c2d6774e4c4d7a233ad200c5a832a133Zoltan Szabadka}
9272733d6c0c2d6774e4c4d7a233ad200c5a832a133Zoltan Szabadka
9281571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadkavoid BrotliCompressor::WriteStreamHeader() {
9291571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  // Encode window size.
930437bbad37074e472b66d427814275de84ca77f19Roderick Sheeter  if (window_bits_ == 16) {
931437bbad37074e472b66d427814275de84ca77f19Roderick Sheeter    WriteBits(1, 0, &storage_ix_, storage_);
932437bbad37074e472b66d427814275de84ca77f19Roderick Sheeter  } else {
933437bbad37074e472b66d427814275de84ca77f19Roderick Sheeter    WriteBits(1, 1, &storage_ix_, storage_);
934437bbad37074e472b66d427814275de84ca77f19Roderick Sheeter    WriteBits(3, window_bits_ - 17, &storage_ix_, storage_);
935437bbad37074e472b66d427814275de84ca77f19Roderick Sheeter  }
9361571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka}
9371571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka
9381571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadkavoid BrotliCompressor::WriteMetaBlock(const size_t input_size,
9391571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                                      const uint8_t* input_buffer,
940e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka                                      const bool is_last,
9411571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                                      size_t* encoded_size,
9421571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                                      uint8_t* encoded_buffer) {
943cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  static const double kMinUTF8Ratio = 0.75;
944cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  bool utf8_mode = false;
9451571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  std::vector<Command> commands;
946e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  if (input_size > 0) {
947e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    ringbuffer_.Write(input_buffer, input_size);
948cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    utf8_mode = IsMostlyUTF8(
949cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka      &ringbuffer_.start()[input_pos_ & kRingBufferMask],
950cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka      input_size, kMinUTF8Ratio);
951cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    if (utf8_mode) {
952cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka      EstimateBitCostsForLiteralsUTF8(input_pos_, input_size,
953278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                                      kRingBufferMask, kRingBufferMask,
954278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                                      ringbuffer_.start(), &literal_cost_[0]);
955cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka    } else {
956cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka      EstimateBitCostsForLiterals(input_pos_, input_size,
957278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                                  kRingBufferMask, kRingBufferMask,
958278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                                  ringbuffer_.start(), &literal_cost_[0]);
959278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    }
960278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    CreateBackwardReferences(
961278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka        input_size, input_pos_,
962278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka        ringbuffer_.start(),
963278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka        &literal_cost_[0],
964278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka        kRingBufferMask, kMaxBackwardDistance,
965278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka        hashers_.get(),
966278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka        hash_type_,
967278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka        &commands);
968278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka    ComputeDistanceShortCodes(&commands, input_pos_, kMaxBackwardDistance,
969278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                              dist_ringbuffer_,
970e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka                              &dist_ringbuffer_idx_);
971e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  }
9721571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  EncodingParams params;
973278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  params.num_direct_distance_codes =
974278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka      params_.mode == BrotliParams::MODE_FONT ? 12 : 0;
975278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  params.distance_postfix_bits =
976278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka      params_.mode == BrotliParams::MODE_FONT ? 1 : 0;
9771571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  params.literal_context_mode = CONTEXT_SIGNED;
978e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  const int storage_ix0 = storage_ix_;
9791571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  MetaBlock mb;
9801571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  BuildMetaBlock(params, commands, ringbuffer_.start(), input_pos_,
9811571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                 kRingBufferMask, &mb);
982e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  StoreMetaBlock(mb, is_last, ringbuffer_.start(), kRingBufferMask,
9831571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                 &input_pos_, &storage_ix_, storage_);
984e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  size_t output_size = is_last ? ((storage_ix_ + 7) >> 3) : (storage_ix_ >> 3);
985278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  output_size -= (storage_ix0 >> 3);
986e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  if (input_size + 4 < output_size) {
987e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    storage_ix_ = storage_ix0;
988e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    storage_[storage_ix_ >> 3] &= (1 << (storage_ix_ & 7)) - 1;
989e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    EncodeMetaBlockLength(input_size, false, true, &storage_ix_, storage_);
990e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    size_t hdr_size = (storage_ix_ + 7) >> 3;
991e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    memcpy(encoded_buffer, storage_, hdr_size);
992e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    memcpy(encoded_buffer + hdr_size, input_buffer, input_size);
993e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    *encoded_size = hdr_size + input_size;
994e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    if (is_last) {
995e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka      encoded_buffer[*encoded_size] = 0x3;  // ISLAST, ISEMPTY
996e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka      ++(*encoded_size);
997e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    }
998e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    storage_ix_ = 0;
999e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    storage_[0] = 0;
1000e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  } else {
1001e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    memcpy(encoded_buffer, storage_, output_size);
1002e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    *encoded_size = output_size;
1003e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    if (is_last) {
1004e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka      storage_ix_ = 0;
1005e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka      storage_[0] = 0;
1006e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    } else {
1007e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka      storage_ix_ -= output_size << 3;
1008e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka      storage_[storage_ix_ >> 3] = storage_[output_size];
1009e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    }
1010e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  }
10111571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka}
10121571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka
10131571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadkavoid BrotliCompressor::FinishStream(
10141571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka    size_t* encoded_size, uint8_t* encoded_buffer) {
1015e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka  WriteMetaBlock(0, NULL, true, encoded_size, encoded_buffer);
10161571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka}
10171571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka
10181571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka
1019278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadkaint BrotliCompressBuffer(BrotliParams params,
1020278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka                         size_t input_size,
102179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                         const uint8_t* input_buffer,
102279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                         size_t* encoded_size,
102379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                         uint8_t* encoded_buffer) {
102479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  if (input_size == 0) {
1025fe79fac8da1ec850d94679705a6f3405153f51ddZoltan Szabadka    encoded_buffer[0] = 6;
1026fe79fac8da1ec850d94679705a6f3405153f51ddZoltan Szabadka    *encoded_size = 1;
102779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    return 1;
102879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
102979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
1030278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka  BrotliCompressor compressor(params);
10311571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  compressor.WriteStreamHeader();
103279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
10331571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  const int max_block_size = 1 << kMetaBlockSizeBits;
10341571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  size_t max_output_size = *encoded_size;
10351571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  const uint8_t* input_end = input_buffer + input_size;
10361571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  *encoded_size = 0;
103779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
10381571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  while (input_buffer < input_end) {
10391571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka    int block_size = max_block_size;
1040e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    bool is_last = false;
10411571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka    if (block_size >= input_end - input_buffer) {
10421571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka      block_size = input_end - input_buffer;
1043e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka      is_last = true;
10441571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka    }
10451571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka    size_t output_size = max_output_size;
1046e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka    compressor.WriteMetaBlock(block_size, input_buffer, is_last,
10471571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka                              &output_size, &encoded_buffer[*encoded_size]);
10481571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka    input_buffer += block_size;
10491571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka    *encoded_size += output_size;
10501571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka    max_output_size -= output_size;
105179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
105279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
105379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  return 1;
105479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
105579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
105679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}  // namespace brotli
1057