179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Copyright 2013 Google Inc. All Rights Reserved. 279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// 379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Licensed under the Apache License, Version 2.0 (the "License"); 479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// you may not use this file except in compliance with the License. 579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// You may obtain a copy of the License at 679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// 779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// http://www.apache.org/licenses/LICENSE-2.0 879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// 979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Unless required by applicable law or agreed to in writing, software 1079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// distributed under the License is distributed on an "AS IS" BASIS, 1179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// See the License for the specific language governing permissions and 1379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// limitations under the License. 1479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// 1579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Block split point selection utilities. 1679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 1779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include "./block_splitter.h" 1879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 1979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include <math.h> 2079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include <stdio.h> 2179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include <stdlib.h> 2279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include <string.h> 2379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 2479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include <algorithm> 2579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include <map> 2679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 2779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include "./cluster.h" 2879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include "./command.h" 2979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include "./fast_log.h" 3079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include "./histogram.h" 3179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 3279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkanamespace brotli { 3379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 34cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadkastatic const int kMaxLiteralHistograms = 100; 3579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkastatic const int kMaxCommandHistograms = 50; 3679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkastatic const double kLiteralBlockSwitchCost = 26; 3779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkastatic const double kCommandBlockSwitchCost = 13.5; 3879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkastatic const double kDistanceBlockSwitchCost = 14.6; 3979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkastatic const int kLiteralStrideLength = 70; 4079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkastatic const int kCommandStrideLength = 40; 41cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadkastatic const int kSymbolsPerLiteralHistogram = 544; 4279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkastatic const int kSymbolsPerCommandHistogram = 530; 43cbd5cb55f487eda746b0d6f8b5742b5a8e5c846aZoltan Szabadkastatic const int kSymbolsPerDistanceHistogram = 544; 4479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkastatic const int kMinLengthForBlockSplitting = 128; 4579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkastatic const int kIterMulForRefining = 2; 4679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkastatic const int kMinItersForRefining = 100; 4779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 4879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid CopyLiteralsToByteArray(const std::vector<Command>& cmds, 4979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka const uint8_t* data, 5079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<uint8_t>* literals) { 5179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // Count how many we have. 5279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka size_t total_length = 0; 5379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < cmds.size(); ++i) { 5479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka total_length += cmds[i].insert_length_; 5579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 5679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (total_length == 0) { 5779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka return; 5879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 5979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 6079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // Allocate. 6179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka literals->resize(total_length); 6279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 6379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // Loop again, and copy this time. 6479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka size_t pos = 0; 6579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka size_t from_pos = 0; 6679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < cmds.size() && pos < total_length; ++i) { 6779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka memcpy(&(*literals)[pos], data + from_pos, cmds[i].insert_length_); 6879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka pos += cmds[i].insert_length_; 6979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka from_pos += cmds[i].insert_length_ + cmds[i].copy_length_; 7079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 7179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka} 7279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 7379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid CopyCommandsToByteArray(const std::vector<Command>& cmds, 7479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<uint16_t>* insert_and_copy_codes, 7579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<uint8_t>* distance_prefixes) { 7679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < cmds.size(); ++i) { 7779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka const Command& cmd = cmds[i]; 7879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka insert_and_copy_codes->push_back(cmd.command_prefix_); 7979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (cmd.copy_length_ > 0 && cmd.distance_prefix_ != 0xffff) { 8079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka distance_prefixes->push_back(cmd.distance_prefix_); 8179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 8279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 8379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka} 8479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 8579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkatemplate<typename HistogramType, typename DataType> 8679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid InitialEntropyCodes(const DataType* data, size_t length, 8779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int literals_per_histogram, 8879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int max_histograms, 8979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka size_t stride, 9079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<HistogramType>* vec) { 9179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int total_histograms = length / literals_per_histogram + 1; 9279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (total_histograms > max_histograms) { 9379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka total_histograms = max_histograms; 9479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 9579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka unsigned int seed = 7; 9679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int block_length = length / total_histograms; 9779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < total_histograms; ++i) { 9879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int pos = length * i / total_histograms; 9979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (i != 0) { 10079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka pos += rand_r(&seed) % block_length; 10179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 10279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (pos + stride >= length) { 10379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka pos = length - stride - 1; 10479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 10579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka HistogramType histo; 10679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka histo.Add(data + pos, stride); 10779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka vec->push_back(histo); 10879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 10979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka} 11079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 11179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkatemplate<typename HistogramType, typename DataType> 11279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid RandomSample(unsigned int* seed, 11379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka const DataType* data, 11479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka size_t length, 11579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka size_t stride, 11679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka HistogramType* sample) { 11740955ce409e55573646af2b0d0ece2e2404f2e7aZoltan Szabadka size_t pos = 0; 11840955ce409e55573646af2b0d0ece2e2404f2e7aZoltan Szabadka if (stride >= length) { 11940955ce409e55573646af2b0d0ece2e2404f2e7aZoltan Szabadka pos = 0; 12040955ce409e55573646af2b0d0ece2e2404f2e7aZoltan Szabadka stride = length; 12140955ce409e55573646af2b0d0ece2e2404f2e7aZoltan Szabadka } else { 12240955ce409e55573646af2b0d0ece2e2404f2e7aZoltan Szabadka pos = rand_r(seed) % (length - stride + 1); 12340955ce409e55573646af2b0d0ece2e2404f2e7aZoltan Szabadka } 12479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka sample->Add(data + pos, stride); 12579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka} 12679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 12779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkatemplate<typename HistogramType, typename DataType> 12879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid RefineEntropyCodes(const DataType* data, size_t length, 12979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka size_t stride, 13079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<HistogramType>* vec) { 13140955ce409e55573646af2b0d0ece2e2404f2e7aZoltan Szabadka int iters = 13279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka kIterMulForRefining * length / stride + kMinItersForRefining; 13379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka unsigned int seed = 7; 13440955ce409e55573646af2b0d0ece2e2404f2e7aZoltan Szabadka iters = ((iters + vec->size() - 1) / vec->size()) * vec->size(); 13579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int iter = 0; iter < iters; ++iter) { 13679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka HistogramType sample; 13779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka RandomSample(&seed, data, length, stride, &sample); 13840955ce409e55573646af2b0d0ece2e2404f2e7aZoltan Szabadka int ix = iter % vec->size(); 13979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka (*vec)[ix].AddHistogram(sample); 14079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 14179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka} 14279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 14379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkainline static float BitCost(int total, int count) { 14479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka return count == 0 ? FastLog2(total) + 2 : FastLog2(total) - FastLog2(count); 14579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka} 14679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 14779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkatemplate<typename DataType, int kSize> 14879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid FindBlocks(const DataType* data, const size_t length, 14979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka const double block_switch_bitcost, 15079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka const std::vector<Histogram<kSize> > &vec, 15179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka uint8_t *block_id) { 15279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (vec.size() <= 1) { 15379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < length; ++i) { 15479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka block_id[i] = 0; 15579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 15679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka return; 15779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 15879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int vecsize = vec.size(); 15979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka double* insert_cost = new double[kSize * vecsize]; 16079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka memset(insert_cost, 0, sizeof(insert_cost[0]) * kSize * vecsize); 16179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < kSize; ++i) { 16279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int j = 0; j < vecsize; ++j) { 16379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka insert_cost[i * vecsize + j] = 16479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka BitCost(vec[j].total_count_, vec[j].data_[i]); 16579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 16679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 16779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka double *cost = new double[vecsize]; 16879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka memset(cost, 0, sizeof(cost[0]) * vecsize); 16979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka bool* switch_signal = new bool[length * vecsize]; 17079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka memset(switch_signal, 0, sizeof(switch_signal[0]) * length * vecsize); 17179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // After each iteration of this loop, cost[k] will contain the difference 17279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // between the minimum cost of arriving at the current byte position using 17379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // entropy code k, and the minimum cost of arriving at the current byte 17479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // position. This difference is capped at the block switch cost, and if it 17579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // reaches block switch cost, it means that when we trace back from the last 17679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // position, we need to switch here. 17779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (size_t byte_ix = 0; byte_ix < length; ++byte_ix) { 17879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int ix = byte_ix * vecsize; 17979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int insert_cost_ix = data[byte_ix] * vecsize; 18079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka double min_cost = 1e99; 18179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int k = 0; k < vecsize; ++k) { 18279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // We are coding the symbol in data[byte_ix] with entropy code k. 18379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka cost[k] += insert_cost[insert_cost_ix + k]; 18479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (cost[k] < min_cost) { 18579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka min_cost = cost[k]; 18679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka block_id[byte_ix] = k; 18779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 18879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 18979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka double block_switch_cost = block_switch_bitcost; 19079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // More blocks for the beginning. 19179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (byte_ix < 2000) { 19279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka block_switch_cost *= 0.77 + 0.07 * byte_ix / 2000; 19379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 19479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int k = 0; k < vecsize; ++k) { 19579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka cost[k] -= min_cost; 19679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (cost[k] >= block_switch_cost) { 19779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka cost[k] = block_switch_cost; 19879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka switch_signal[ix + k] = true; 19979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 20079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 20179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 20279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // Now trace back from the last position and switch at the marked places. 20379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int byte_ix = length - 1; 20479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int ix = byte_ix * vecsize; 20579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int cur_id = block_id[byte_ix]; 20679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka while (byte_ix > 0) { 20779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka --byte_ix; 20879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka ix -= vecsize; 20979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (switch_signal[ix + cur_id]) { 21079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka cur_id = block_id[byte_ix]; 21179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 21279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka block_id[byte_ix] = cur_id; 21379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 21479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka delete[] insert_cost; 21579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka delete[] cost; 21679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka delete[] switch_signal; 21779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka} 21879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 21979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkaint RemapBlockIds(uint8_t* block_ids, const size_t length) { 22079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::map<uint8_t, uint8_t> new_id; 22179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int next_id = 0; 22279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < length; ++i) { 22379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (new_id.find(block_ids[i]) == new_id.end()) { 22479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka new_id[block_ids[i]] = next_id; 22579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka ++next_id; 22679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 22779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 22879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < length; ++i) { 22979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka block_ids[i] = new_id[block_ids[i]]; 23079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 23179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka return next_id; 23279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka} 23379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 23479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkatemplate<typename HistogramType, typename DataType> 23579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid BuildBlockHistograms(const DataType* data, const size_t length, 23679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka uint8_t* block_ids, 23779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<HistogramType>* histograms) { 23879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int num_types = RemapBlockIds(block_ids, length); 23979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka histograms->clear(); 24079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka histograms->resize(num_types); 24179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < length; ++i) { 24279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka (*histograms)[block_ids[i]].Add(data[i]); 24379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 24479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka} 24579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 24679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkatemplate<typename HistogramType, typename DataType> 24779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid ClusterBlocks(const DataType* data, const size_t length, 24879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka uint8_t* block_ids) { 24979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<HistogramType> histograms; 25079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<int> block_index(length); 25179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int cur_idx = 0; 25279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka HistogramType cur_histogram; 25379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < length; ++i) { 25479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka bool block_boundary = (i + 1 == length || block_ids[i] != block_ids[i + 1]); 25579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka block_index[i] = cur_idx; 25679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka cur_histogram.Add(data[i]); 25779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (block_boundary) { 25879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka histograms.push_back(cur_histogram); 25979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka cur_histogram.Clear(); 26079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka ++cur_idx; 26179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 26279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 26379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<HistogramType> clustered_histograms; 26479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<int> histogram_symbols; 265e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka // Block ids need to fit in one byte. 266e70949119a5540e62ef3b9aae797f797a8d8b44bZoltan Szabadka static const int kMaxNumberOfBlockTypes = 256; 26779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka ClusterHistograms(histograms, 1, histograms.size(), 26879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka kMaxNumberOfBlockTypes, 26979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka &clustered_histograms, 27079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka &histogram_symbols); 27179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < length; ++i) { 27279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka block_ids[i] = histogram_symbols[block_index[i]]; 27379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 27479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka} 27579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 27679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid BuildBlockSplit(const std::vector<uint8_t>& block_ids, BlockSplit* split) { 27779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int cur_id = block_ids[0]; 27879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int cur_length = 1; 27979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka split->num_types_ = -1; 28079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 1; i < block_ids.size(); ++i) { 28179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (block_ids[i] != cur_id) { 28279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka split->types_.push_back(cur_id); 28379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka split->lengths_.push_back(cur_length); 28479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka split->num_types_ = std::max(split->num_types_, cur_id); 28579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka cur_id = block_ids[i]; 28679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka cur_length = 0; 28779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 28879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka ++cur_length; 28979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 29079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka split->types_.push_back(cur_id); 29179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka split->lengths_.push_back(cur_length); 29279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka split->num_types_ = std::max(split->num_types_, cur_id); 29379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka ++split->num_types_; 29479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka} 29579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 29679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkatemplate<typename HistogramType, typename DataType> 29779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid SplitByteVector(const std::vector<DataType>& data, 29879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka const int literals_per_histogram, 29979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka const int max_histograms, 30079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka const int sampling_stride_length, 30179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka const double block_switch_cost, 30279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka BlockSplit* split) { 30379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (data.empty()) { 30479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka split->num_types_ = 0; 30579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka return; 30679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } else if (data.size() < kMinLengthForBlockSplitting) { 30779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka split->num_types_ = 1; 30879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka split->types_.push_back(0); 30979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka split->lengths_.push_back(data.size()); 31079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka return; 31179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 31279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<HistogramType> histograms; 31379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // Find good entropy codes. 31479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka InitialEntropyCodes(data.data(), data.size(), 31579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka literals_per_histogram, 31679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka max_histograms, 31779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka sampling_stride_length, 31879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka &histograms); 31979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka RefineEntropyCodes(data.data(), data.size(), 32079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka sampling_stride_length, 32179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka &histograms); 32279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // Find a good path through literals with the good entropy codes. 32379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<uint8_t> block_ids(data.size()); 32479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < 10; ++i) { 32579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka FindBlocks(data.data(), data.size(), 32679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka block_switch_cost, 32779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka histograms, 32879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka &block_ids[0]); 32979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka BuildBlockHistograms(data.data(), data.size(), &block_ids[0], &histograms); 33079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 33179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka ClusterBlocks<HistogramType>(data.data(), data.size(), &block_ids[0]); 33279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka BuildBlockSplit(block_ids, split); 33379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka} 33479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 33579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid SplitBlock(const std::vector<Command>& cmds, 33679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka const uint8_t* data, 33779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka BlockSplit* literal_split, 33879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka BlockSplit* insert_and_copy_split, 33979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka BlockSplit* dist_split) { 34079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // Create a continuous array of literals. 34179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<uint8_t> literals; 34279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka CopyLiteralsToByteArray(cmds, data, &literals); 34379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 34479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // Compute prefix codes for commands. 34579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<uint16_t> insert_and_copy_codes; 34679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<uint8_t> distance_prefixes; 34779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka CopyCommandsToByteArray(cmds, 34879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka &insert_and_copy_codes, 34979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka &distance_prefixes); 35079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 351278b89484fa947fad7fbf8753aadff0c9ce18a8cZoltan Szabadka 35279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka SplitByteVector<HistogramLiteral>( 35379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka literals, 35479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka kSymbolsPerLiteralHistogram, kMaxLiteralHistograms, 35579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka kLiteralStrideLength, kLiteralBlockSwitchCost, 35679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka literal_split); 35779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka SplitByteVector<HistogramCommand>( 35879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka insert_and_copy_codes, 35979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka kSymbolsPerCommandHistogram, kMaxCommandHistograms, 36079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka kCommandStrideLength, kCommandBlockSwitchCost, 36179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka insert_and_copy_split); 36279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka SplitByteVector<HistogramDistance>( 36379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka distance_prefixes, 36479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka kSymbolsPerDistanceHistogram, kMaxCommandHistograms, 36579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka kCommandStrideLength, kDistanceBlockSwitchCost, 36679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka dist_split); 36779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka} 36879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 36979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid SplitBlockByTotalLength(const std::vector<Command>& all_commands, 37079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int input_size, 37179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int target_length, 37279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<std::vector<Command> >* blocks) { 37379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int num_blocks = input_size / target_length + 1; 37479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int length_limit = input_size / num_blocks + 1; 37579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int total_length = 0; 37679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<Command> cur_block; 37779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < all_commands.size(); ++i) { 37879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka const Command& cmd = all_commands[i]; 37979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int cmd_length = cmd.insert_length_ + cmd.copy_length_; 38079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (total_length > length_limit) { 38179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka blocks->push_back(cur_block); 38279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka cur_block.clear(); 38379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka total_length = 0; 38479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 38579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka cur_block.push_back(cmd); 38679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka total_length += cmd_length; 38779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 38879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka blocks->push_back(cur_block); 38979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka} 39079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 39179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka} // namespace brotli 392