179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Copyright 2010 Google Inc. All Rights Reserved.
279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka//
379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Licensed under the Apache License, Version 2.0 (the "License");
479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// you may not use this file except in compliance with the License.
579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// You may obtain a copy of the License at
679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka//
779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// http://www.apache.org/licenses/LICENSE-2.0
879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka//
979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Unless required by applicable law or agreed to in writing, software
1079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// distributed under the License is distributed on an "AS IS" BASIS,
1179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
1279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// See the License for the specific language governing permissions and
1379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// limitations under the License.
1479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka//
1579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Entropy encoding (Huffman) utilities.
1679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
1779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#ifndef BROTLI_ENC_ENTROPY_ENCODE_H_
1879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#define BROTLI_ENC_ENTROPY_ENCODE_H_
1979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
2079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include <stdint.h>
2179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include <string.h>
2279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include "./histogram.h"
2379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include "./prefix.h"
2479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
2579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkanamespace brotli {
2679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
2779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// This function will create a Huffman tree.
2879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka//
2979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// The (data,length) contains the population counts.
3079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// The tree_limit is the maximum bit depth of the Huffman codes.
3179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka//
3279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// The depth contains the tree, i.e., how many bits are used for
3379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// the symbol.
3479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka//
3579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// See http://en.wikipedia.org/wiki/Huffman_coding
3679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid CreateHuffmanTree(const int *data,
3779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                       const int length,
3879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                       const int tree_limit,
3979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                       uint8_t *depth);
4079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
4179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Change the population counts in a way that the consequent
4279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Hufmann tree compression, especially its rle-part will be more
4379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// likely to compress this data more efficiently.
4479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka//
4579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// length contains the size of the histogram.
4679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// counts contains the population counts.
4779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkaint OptimizeHuffmanCountsForRle(int length, int* counts);
4879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
4979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
5079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Write a huffman tree from bit depths into the bitstream representation
5179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// of a Huffman tree. The generated Huffman tree is to be compressed once
5279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// more using a Huffman tree
5379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid WriteHuffmanTree(const uint8_t* depth, const int length,
5479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                      uint8_t* tree,
5579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                      uint8_t* extra_bits_data,
5679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                      int* huffman_tree_size);
5779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
5879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Get the actual bit values for a tree of bit depths.
5979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid ConvertBitDepthsToSymbols(const uint8_t *depth, int len, uint16_t *bits);
6079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
6179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkatemplate<int kSize>
6279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkastruct EntropyCode {
6379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  // How many bits for symbol.
6479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  uint8_t depth_[kSize];
6579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  // Actual bits used to represent the symbol.
6679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  uint16_t bits_[kSize];
6779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  // How many non-zero depth.
6879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  int count_;
691571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  // First four symbols with non-zero depth.
701571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka  int symbols_[4];
7179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka};
7279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
7379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkatemplate<int kSize>
7479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid BuildEntropyCode(const Histogram<kSize>& histogram,
7579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                      const int tree_limit,
7679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                      const int alphabet_size,
7779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                      EntropyCode<kSize>* code) {
7879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  memset(code->depth_, 0, sizeof(code->depth_));
7979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  memset(code->bits_, 0, sizeof(code->bits_));
8079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  memset(code->symbols_, 0, sizeof(code->symbols_));
8179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  code->count_ = 0;
8279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  if (histogram.total_count_ == 0) return;
8379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  for (int i = 0; i < kSize; ++i) {
8479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    if (histogram.data_[i] > 0) {
851571db36a9b00e895882ee236e9f84d62f8ea226Zoltan Szabadka      if (code->count_ < 4) code->symbols_[code->count_] = i;
8679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka      ++code->count_;
8779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    }
8879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
89cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadka  if (alphabet_size >= 50 && code->count_ >= 16) {
9079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    int counts[kSize];
9179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    memcpy(counts, &histogram.data_[0], sizeof(counts[0]) * kSize);
9279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    OptimizeHuffmanCountsForRle(alphabet_size, counts);
9379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    CreateHuffmanTree(counts, alphabet_size, tree_limit, &code->depth_[0]);
9479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  } else {
9579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka    CreateHuffmanTree(&histogram.data_[0], alphabet_size, tree_limit,
9679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka                      &code->depth_[0]);
9779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  }
9879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka  ConvertBitDepthsToSymbols(&code->depth_[0], alphabet_size, &code->bits_[0]);
9979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}
10079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
101e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadkastatic const int kCodeLengthCodes = 18;
10279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
10379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Literal entropy code.
10479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkatypedef EntropyCode<256> EntropyCodeLiteral;
10579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Prefix entropy codes.
10679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkatypedef EntropyCode<kNumCommandPrefixes> EntropyCodeCommand;
10779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkatypedef EntropyCode<kNumDistancePrefixes> EntropyCodeDistance;
10879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkatypedef EntropyCode<kNumBlockLenPrefixes> EntropyCodeBlockLength;
109e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka// Context map entropy code, 256 Huffman tree indexes + 16 run length codes.
110e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadkatypedef EntropyCode<272> EntropyCodeContextMap;
111e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka// Block type entropy code, 256 block types + 2 special symbols.
112e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadkatypedef EntropyCode<258> EntropyCodeBlockType;
11379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
11479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}  // namespace brotli
11579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka
11679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#endif  // BROTLI_ENC_ENTROPY_ENCODE_H_
117