179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Copyright 2013 Google Inc. All Rights Reserved. 279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// 379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Licensed under the Apache License, Version 2.0 (the "License"); 479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// you may not use this file except in compliance with the License. 579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// You may obtain a copy of the License at 679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// 779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// http://www.apache.org/licenses/LICENSE-2.0 879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// 979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Unless required by applicable law or agreed to in writing, software 1079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// distributed under the License is distributed on an "AS IS" BASIS, 1179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 1279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// See the License for the specific language governing permissions and 1379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// limitations under the License. 1479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// 1579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Functions for clustering similar histograms together. 1679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 1779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#ifndef BROTLI_ENC_CLUSTER_H_ 1879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#define BROTLI_ENC_CLUSTER_H_ 1979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 2079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include <math.h> 2179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include <stdint.h> 2279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include <stdio.h> 2379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include <complex> 2479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include <map> 2579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include <set> 2679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include <utility> 2779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include <vector> 2879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 2979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include "./bit_cost.h" 3079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include "./entropy_encode.h" 3179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include "./fast_log.h" 3279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#include "./histogram.h" 3379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 3479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkanamespace brotli { 3579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 3679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkastruct HistogramPair { 3779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int idx1; 3879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int idx2; 3979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka bool valid; 4079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka double cost_combo; 4179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka double cost_diff; 4279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}; 4379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 4479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkastruct HistogramPairComparator { 4579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka bool operator()(const HistogramPair& p1, const HistogramPair& p2) { 4679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (p1.cost_diff != p2.cost_diff) { 4779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka return p1.cost_diff > p2.cost_diff; 4879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 4979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka return abs(p1.idx1 - p1.idx2) > abs(p2.idx1 - p2.idx2); 5079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 5179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka}; 5279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 5379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Returns entropy reduction of the context map when we combine two clusters. 5479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkainline double ClusterCostDiff(int size_a, int size_b) { 5579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int size_c = size_a + size_b; 5679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka return size_a * FastLog2(size_a) + size_b * FastLog2(size_b) - 5779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka size_c * FastLog2(size_c); 5879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka} 5979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 6079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Computes the bit cost reduction by combining out[idx1] and out[idx2] and if 6179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// it is below a threshold, stores the pair (idx1, idx2) in the *pairs heap. 6279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkatemplate<int kSize> 6379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid CompareAndPushToHeap(const Histogram<kSize>* out, 6479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka const int* cluster_size, 6579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int idx1, int idx2, 6679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<HistogramPair>* pairs) { 6779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (idx1 == idx2) { 6879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka return; 6979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 7079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (idx2 < idx1) { 7179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int t = idx2; 7279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka idx2 = idx1; 7379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka idx1 = t; 7479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 7579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka bool store_pair = false; 7679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka HistogramPair p; 7779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka p.idx1 = idx1; 7879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka p.idx2 = idx2; 7979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka p.valid = true; 8079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]); 8179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka p.cost_diff -= out[idx1].bit_cost_; 8279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka p.cost_diff -= out[idx2].bit_cost_; 8379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 8479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (out[idx1].total_count_ == 0) { 8579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka p.cost_combo = out[idx2].bit_cost_; 8679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka store_pair = true; 8779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } else if (out[idx2].total_count_ == 0) { 8879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka p.cost_combo = out[idx1].bit_cost_; 8979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka store_pair = true; 9079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } else { 9179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka double threshold = pairs->empty() ? 1e99 : 9279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::max(0.0, (*pairs)[0].cost_diff); 9379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka Histogram<kSize> combo = out[idx1]; 9479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka combo.AddHistogram(out[idx2]); 9579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka double cost_combo = PopulationCost(combo); 9679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (cost_combo < threshold - p.cost_diff) { 9779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka p.cost_combo = cost_combo; 9879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka store_pair = true; 9979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 10079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 10179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (store_pair) { 10279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka p.cost_diff += p.cost_combo; 10379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka pairs->push_back(p); 10479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka push_heap(pairs->begin(), pairs->end(), HistogramPairComparator()); 10579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 10679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka} 10779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 10879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkatemplate<int kSize> 10979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid HistogramCombine(Histogram<kSize>* out, 11079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int* cluster_size, 11179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int* symbols, 11279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int symbols_size, 11379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int max_clusters) { 11479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka double cost_diff_threshold = 0.0; 11579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int min_cluster_size = 1; 11679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::set<int> all_symbols; 11779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<int> clusters; 11879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < symbols_size; ++i) { 11979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (all_symbols.find(symbols[i]) == all_symbols.end()) { 12079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka all_symbols.insert(symbols[i]); 12179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka clusters.push_back(symbols[i]); 12279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 12379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 12479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 12579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // We maintain a heap of histogram pairs, ordered by the bit cost reduction. 12679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<HistogramPair> pairs; 12779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int idx1 = 0; idx1 < clusters.size(); ++idx1) { 12879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int idx2 = idx1 + 1; idx2 < clusters.size(); ++idx2) { 12979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka CompareAndPushToHeap(out, cluster_size, clusters[idx1], clusters[idx2], 13079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka &pairs); 13179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 13279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 13379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 13479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka while (clusters.size() > min_cluster_size) { 13579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (pairs[0].cost_diff >= cost_diff_threshold) { 13679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka cost_diff_threshold = 1e99; 13779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka min_cluster_size = max_clusters; 13879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka continue; 13979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 14079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // Take the best pair from the top of heap. 14179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int best_idx1 = pairs[0].idx1; 14279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int best_idx2 = pairs[0].idx2; 14379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka out[best_idx1].AddHistogram(out[best_idx2]); 14479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka out[best_idx1].bit_cost_ = pairs[0].cost_combo; 14579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka cluster_size[best_idx1] += cluster_size[best_idx2]; 14679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < symbols_size; ++i) { 14779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (symbols[i] == best_idx2) { 14879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka symbols[i] = best_idx1; 14979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 15079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 15179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i + 1 < clusters.size(); ++i) { 15279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (clusters[i] >= best_idx2) { 15379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka clusters[i] = clusters[i + 1]; 15479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 15579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 15679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka clusters.pop_back(); 15779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // Invalidate pairs intersecting the just combined best pair. 15879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < pairs.size(); ++i) { 15979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka HistogramPair& p = pairs[i]; 16079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (p.idx1 == best_idx1 || p.idx2 == best_idx1 || 16179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka p.idx1 == best_idx2 || p.idx2 == best_idx2) { 16279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka p.valid = false; 16379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 16479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 16579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // Pop invalid pairs from the top of the heap. 16679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka while (!pairs.empty() && !pairs[0].valid) { 16779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka pop_heap(pairs.begin(), pairs.end(), HistogramPairComparator()); 16879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka pairs.pop_back(); 16979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 17079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // Push new pairs formed with the combined histogram to the heap. 17179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < clusters.size(); ++i) { 17279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka CompareAndPushToHeap(out, cluster_size, best_idx1, clusters[i], &pairs); 17379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 17479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 17579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka} 17679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 17779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// ----------------------------------------------------------------------------- 17879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Histogram refinement 17979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 18079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// What is the bit cost of moving histogram from cur_symbol to candidate. 18179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkatemplate<int kSize> 18279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkadouble HistogramBitCostDistance(const Histogram<kSize>& histogram, 18379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka const Histogram<kSize>& candidate) { 18479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (histogram.total_count_ == 0) { 18579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka return 0.0; 18679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 18779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka Histogram<kSize> tmp = histogram; 18879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka tmp.AddHistogram(candidate); 18979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka return PopulationCost(tmp) - candidate.bit_cost_; 19079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka} 19179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 19279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Find the best 'out' histogram for each of the 'in' histograms. 19379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Note: we assume that out[]->bit_cost_ is already up-to-date. 19479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkatemplate<int kSize> 19579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid HistogramRemap(const Histogram<kSize>* in, int in_size, 19679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka Histogram<kSize>* out, int* symbols) { 19779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::set<int> all_symbols; 19879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < in_size; ++i) { 19979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka all_symbols.insert(symbols[i]); 20079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 20179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < in_size; ++i) { 20279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int best_out = i == 0 ? symbols[0] : symbols[i - 1]; 20379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka double best_bits = HistogramBitCostDistance(in[i], out[best_out]); 20479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (std::set<int>::const_iterator k = all_symbols.begin(); 20579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka k != all_symbols.end(); ++k) { 20679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka const double cur_bits = HistogramBitCostDistance(in[i], out[*k]); 20779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (cur_bits < best_bits) { 20879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka best_bits = cur_bits; 20979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka best_out = *k; 21079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 21179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 21279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka symbols[i] = best_out; 21379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 21479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 21579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // Recompute each out based on raw and symbols. 21679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (std::set<int>::const_iterator k = all_symbols.begin(); 21779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka k != all_symbols.end(); ++k) { 21879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka out[*k].Clear(); 21979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 22079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < in_size; ++i) { 22179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka out[symbols[i]].AddHistogram(in[i]); 22279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 22379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka} 22479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 22579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Reorder histograms in *out so that the new symbols in *symbols come in 22679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// increasing order. 22779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkatemplate<int kSize> 22879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid HistogramReindex(std::vector<Histogram<kSize> >* out, 22979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<int>* symbols) { 23079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<Histogram<kSize> > tmp(*out); 23179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::map<int, int> new_index; 23279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int next_index = 0; 23379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < symbols->size(); ++i) { 23479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (new_index.find((*symbols)[i]) == new_index.end()) { 23579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka new_index[(*symbols)[i]] = next_index; 23679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka (*out)[next_index] = tmp[(*symbols)[i]]; 23779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka ++next_index; 23879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 23979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 24079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka out->resize(next_index); 24179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < symbols->size(); ++i) { 24279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka (*symbols)[i] = new_index[(*symbols)[i]]; 24379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 24479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka} 24579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 24679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// Clusters similar histograms in 'in' together, the selected histograms are 24779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// placed in 'out', and for each index in 'in', *histogram_symbols will 24879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka// indicate which of the 'out' histograms is the best approximation. 24979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkatemplate<int kSize> 25079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadkavoid ClusterHistograms(const std::vector<Histogram<kSize> >& in, 25179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int num_contexts, int num_blocks, 25279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka int max_histograms, 25379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<Histogram<kSize> >* out, 25479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<int>* histogram_symbols) { 25579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka const int in_size = num_contexts * num_blocks; 25679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka std::vector<int> cluster_size(in_size, 1); 25779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka out->resize(in_size); 25879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka histogram_symbols->resize(in_size); 25979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < in_size; ++i) { 26079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka (*out)[i] = in[i]; 26179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka (*out)[i].bit_cost_ = PopulationCost(in[i]); 26279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka (*histogram_symbols)[i] = i; 26379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 26479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 26579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // Collapse similar histograms within a block type. 26679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka if (num_contexts > 1) { 26779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka for (int i = 0; i < num_blocks; ++i) { 26879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka HistogramCombine(&(*out)[0], &cluster_size[0], 26979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka &(*histogram_symbols)[i * num_contexts], num_contexts, 27079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka max_histograms); 27179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 27279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka } 27379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 27479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // Collapse similar histograms. 27579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka HistogramCombine(&(*out)[0], &cluster_size[0], 27679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka &(*histogram_symbols)[0], in_size, 27779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka max_histograms); 27879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 27979e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // Find the optimal map from original histograms to the final ones. 28079e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka HistogramRemap(&in[0], in_size, &(*out)[0], &(*histogram_symbols)[0]); 28179e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 28279e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka // Convert the context map to a canonical form. 28379e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka HistogramReindex(out, histogram_symbols); 28479e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka} 28579e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 28679e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka} // namespace brotli 28779e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka 28879e99afe468407e9ff9f0820df7190cb069eabebZoltan Szabadka#endif // BROTLI_ENC_CLUSTER_H_ 289