1b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov/* NOLINT(build/header_guard) */ 2b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov/* Copyright 2013 Google Inc. All Rights Reserved. 3b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov 4b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov Distributed under MIT license. 5b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 6b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov*/ 7b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov 8b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov/* template parameters: FN, CODE */ 9b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov 10b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov#define HistogramType FN(Histogram) 11b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov 12b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov/* Computes the bit cost reduction by combining out[idx1] and out[idx2] and if 13b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov it is below a threshold, stores the pair (idx1, idx2) in the *pairs queue. */ 14b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene KliuchnikovBROTLI_INTERNAL void FN(BrotliCompareAndPushToQueue)( 15b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov const HistogramType* out, const uint32_t* cluster_size, uint32_t idx1, 16b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov uint32_t idx2, size_t max_num_pairs, HistogramPair* pairs, 17b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov size_t* num_pairs) CODE({ 182048189048f130b0e8fb60307277379743dc5a2dEugene Kliuchnikov BROTLI_BOOL is_good_pair = BROTLI_FALSE; 19b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov HistogramPair p; 20b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov if (idx1 == idx2) { 21b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov return; 22b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 23b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov if (idx2 < idx1) { 24b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov uint32_t t = idx2; 25b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov idx2 = idx1; 26b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov idx1 = t; 27b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 28b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov p.idx1 = idx1; 29b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov p.idx2 = idx2; 30b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov p.cost_diff = 0.5 * ClusterCostDiff(cluster_size[idx1], cluster_size[idx2]); 31b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov p.cost_diff -= out[idx1].bit_cost_; 32b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov p.cost_diff -= out[idx2].bit_cost_; 33b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov 34b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov if (out[idx1].total_count_ == 0) { 35b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov p.cost_combo = out[idx2].bit_cost_; 362048189048f130b0e8fb60307277379743dc5a2dEugene Kliuchnikov is_good_pair = BROTLI_TRUE; 37b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } else if (out[idx2].total_count_ == 0) { 38b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov p.cost_combo = out[idx1].bit_cost_; 392048189048f130b0e8fb60307277379743dc5a2dEugene Kliuchnikov is_good_pair = BROTLI_TRUE; 40b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } else { 41b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov double threshold = *num_pairs == 0 ? 1e99 : 42b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov BROTLI_MAX(double, 0.0, pairs[0].cost_diff); 43b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov HistogramType combo = out[idx1]; 44b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov double cost_combo; 45b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov FN(HistogramAddHistogram)(&combo, &out[idx2]); 46b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov cost_combo = FN(BrotliPopulationCost)(&combo); 47b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov if (cost_combo < threshold - p.cost_diff) { 48b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov p.cost_combo = cost_combo; 492048189048f130b0e8fb60307277379743dc5a2dEugene Kliuchnikov is_good_pair = BROTLI_TRUE; 50b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 51b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 52b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov if (is_good_pair) { 53b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov p.cost_diff += p.cost_combo; 54b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov if (*num_pairs > 0 && HistogramPairIsLess(&pairs[0], &p)) { 55b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov /* Replace the top of the queue if needed. */ 56b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov if (*num_pairs < max_num_pairs) { 57b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov pairs[*num_pairs] = pairs[0]; 58b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov ++(*num_pairs); 59b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 60b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov pairs[0] = p; 61b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } else if (*num_pairs < max_num_pairs) { 62b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov pairs[*num_pairs] = p; 63b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov ++(*num_pairs); 64b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 65b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 66b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov}) 67b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov 68b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene KliuchnikovBROTLI_INTERNAL size_t FN(BrotliHistogramCombine)(HistogramType* out, 69b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov uint32_t* cluster_size, 70b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov uint32_t* symbols, 71b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov uint32_t* clusters, 72b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov HistogramPair* pairs, 73b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov size_t num_clusters, 74b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov size_t symbols_size, 75b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov size_t max_clusters, 76b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov size_t max_num_pairs) CODE({ 77b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov double cost_diff_threshold = 0.0; 78b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov size_t min_cluster_size = 1; 79b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov size_t num_pairs = 0; 80b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov 81b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov { 82b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov /* We maintain a vector of histogram pairs, with the property that the pair 83b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov with the maximum bit cost reduction is the first. */ 84b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov size_t idx1; 85b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov for (idx1 = 0; idx1 < num_clusters; ++idx1) { 86b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov size_t idx2; 87b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov for (idx2 = idx1 + 1; idx2 < num_clusters; ++idx2) { 88b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov FN(BrotliCompareAndPushToQueue)(out, cluster_size, clusters[idx1], 89b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov clusters[idx2], max_num_pairs, &pairs[0], &num_pairs); 90b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 91b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 92b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 93b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov 94b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov while (num_clusters > min_cluster_size) { 95b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov uint32_t best_idx1; 96b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov uint32_t best_idx2; 97b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov size_t i; 98b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov if (pairs[0].cost_diff >= cost_diff_threshold) { 99b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov cost_diff_threshold = 1e99; 100b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov min_cluster_size = max_clusters; 101b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov continue; 102b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 103b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov /* Take the best pair from the top of heap. */ 104b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov best_idx1 = pairs[0].idx1; 105b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov best_idx2 = pairs[0].idx2; 106b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov FN(HistogramAddHistogram)(&out[best_idx1], &out[best_idx2]); 107b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov out[best_idx1].bit_cost_ = pairs[0].cost_combo; 108b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov cluster_size[best_idx1] += cluster_size[best_idx2]; 109b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov for (i = 0; i < symbols_size; ++i) { 110b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov if (symbols[i] == best_idx2) { 111b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov symbols[i] = best_idx1; 112b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 113b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 114b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov for (i = 0; i < num_clusters; ++i) { 115b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov if (clusters[i] == best_idx2) { 116b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov memmove(&clusters[i], &clusters[i + 1], 117b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov (num_clusters - i - 1) * sizeof(clusters[0])); 118b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov break; 119b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 120b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 121b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov --num_clusters; 122b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov { 123b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov /* Remove pairs intersecting the just combined best pair. */ 124b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov size_t copy_to_idx = 0; 125b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov for (i = 0; i < num_pairs; ++i) { 126b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov HistogramPair* p = &pairs[i]; 127b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov if (p->idx1 == best_idx1 || p->idx2 == best_idx1 || 128b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov p->idx1 == best_idx2 || p->idx2 == best_idx2) { 129b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov /* Remove invalid pair from the queue. */ 130b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov continue; 131b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 132b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov if (HistogramPairIsLess(&pairs[0], p)) { 133b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov /* Replace the top of the queue if needed. */ 134b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov HistogramPair front = pairs[0]; 135b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov pairs[0] = *p; 136b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov pairs[copy_to_idx] = front; 137b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } else { 138b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov pairs[copy_to_idx] = *p; 139b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 140b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov ++copy_to_idx; 141b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 142b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov num_pairs = copy_to_idx; 143b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 144b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov 145b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov /* Push new pairs formed with the combined histogram to the heap. */ 146b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov for (i = 0; i < num_clusters; ++i) { 147b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov FN(BrotliCompareAndPushToQueue)(out, cluster_size, best_idx1, clusters[i], 148b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov max_num_pairs, &pairs[0], &num_pairs); 149b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 150b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 151b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov return num_clusters; 152b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov}) 153b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov 154b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov/* What is the bit cost of moving histogram from cur_symbol to candidate. */ 155b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene KliuchnikovBROTLI_INTERNAL double FN(BrotliHistogramBitCostDistance)( 156b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov const HistogramType* histogram, const HistogramType* candidate) CODE({ 157b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov if (histogram->total_count_ == 0) { 158b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov return 0.0; 159b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } else { 160b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov HistogramType tmp = *histogram; 161b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov FN(HistogramAddHistogram)(&tmp, candidate); 162b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov return FN(BrotliPopulationCost)(&tmp) - candidate->bit_cost_; 163b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 164b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov}) 165b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov 166b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov/* Find the best 'out' histogram for each of the 'in' histograms. 167b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov When called, clusters[0..num_clusters) contains the unique values from 168b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov symbols[0..in_size), but this property is not preserved in this function. 169b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov Note: we assume that out[]->bit_cost_ is already up-to-date. */ 170b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene KliuchnikovBROTLI_INTERNAL void FN(BrotliHistogramRemap)(const HistogramType* in, 171b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov size_t in_size, const uint32_t* clusters, size_t num_clusters, 172b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov HistogramType* out, uint32_t* symbols) CODE({ 173b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov size_t i; 174b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov for (i = 0; i < in_size; ++i) { 175b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov uint32_t best_out = i == 0 ? symbols[0] : symbols[i - 1]; 176b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov double best_bits = 177b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov FN(BrotliHistogramBitCostDistance)(&in[i], &out[best_out]); 178b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov size_t j; 179b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov for (j = 0; j < num_clusters; ++j) { 180b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov const double cur_bits = 181b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov FN(BrotliHistogramBitCostDistance)(&in[i], &out[clusters[j]]); 182b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov if (cur_bits < best_bits) { 183b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov best_bits = cur_bits; 184b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov best_out = clusters[j]; 185b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 186b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 187b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov symbols[i] = best_out; 188b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 189b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov 190b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov /* Recompute each out based on raw and symbols. */ 191b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov for (i = 0; i < num_clusters; ++i) { 192b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov FN(HistogramClear)(&out[clusters[i]]); 193b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 194b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov for (i = 0; i < in_size; ++i) { 195b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov FN(HistogramAddHistogram)(&out[symbols[i]], &in[i]); 196b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 197b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov}) 198b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov 199b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov/* Reorders elements of the out[0..length) array and changes values in 200b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov symbols[0..length) array in the following way: 201b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov * when called, symbols[] contains indexes into out[], and has N unique 202b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov values (possibly N < length) 203b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov * on return, symbols'[i] = f(symbols[i]) and 204b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov out'[symbols'[i]] = out[symbols[i]], for each 0 <= i < length, 205b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov where f is a bijection between the range of symbols[] and [0..N), and 206b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov the first occurrences of values in symbols'[i] come in consecutive 207b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov increasing order. 208b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov Returns N, the number of unique values in symbols[]. */ 209b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene KliuchnikovBROTLI_INTERNAL size_t FN(BrotliHistogramReindex)(MemoryManager* m, 210b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov HistogramType* out, uint32_t* symbols, size_t length) CODE({ 211b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov static const uint32_t kInvalidIndex = BROTLI_UINT32_MAX; 212b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov uint32_t* new_index = BROTLI_ALLOC(m, uint32_t, length); 213b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov uint32_t next_index; 214b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov HistogramType* tmp; 215b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov size_t i; 216b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov if (BROTLI_IS_OOM(m)) return 0; 217b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov for (i = 0; i < length; ++i) { 218b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov new_index[i] = kInvalidIndex; 219b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 220b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov next_index = 0; 221b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov for (i = 0; i < length; ++i) { 222b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov if (new_index[symbols[i]] == kInvalidIndex) { 223b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov new_index[symbols[i]] = next_index; 224b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov ++next_index; 225b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 226b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 227b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov /* TODO: by using idea of "cycle-sort" we can avoid allocation of 228b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov tmp and reduce the number of copying by the factor of 2. */ 229b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov tmp = BROTLI_ALLOC(m, HistogramType, next_index); 230b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov if (BROTLI_IS_OOM(m)) return 0; 231b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov next_index = 0; 232b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov for (i = 0; i < length; ++i) { 233b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov if (new_index[symbols[i]] == next_index) { 234b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov tmp[next_index] = out[symbols[i]]; 235b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov ++next_index; 236b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 237b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov symbols[i] = new_index[symbols[i]]; 238b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 239b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov BROTLI_FREE(m, new_index); 240b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov for (i = 0; i < next_index; ++i) { 241b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov out[i] = tmp[i]; 242b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 243b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov BROTLI_FREE(m, tmp); 244b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov return next_index; 245b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov}) 246b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov 247b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene KliuchnikovBROTLI_INTERNAL void FN(BrotliClusterHistograms)( 248b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov MemoryManager* m, const HistogramType* in, const size_t in_size, 249b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov size_t max_histograms, HistogramType* out, size_t* out_size, 250b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov uint32_t* histogram_symbols) CODE({ 251b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov uint32_t* cluster_size = BROTLI_ALLOC(m, uint32_t, in_size); 252b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov uint32_t* clusters = BROTLI_ALLOC(m, uint32_t, in_size); 253b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov size_t num_clusters = 0; 254b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov const size_t max_input_histograms = 64; 255b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov size_t pairs_capacity = max_input_histograms * max_input_histograms / 2; 256b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov /* For the first pass of clustering, we allow all pairs. */ 257b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov HistogramPair* pairs = BROTLI_ALLOC(m, HistogramPair, pairs_capacity + 1); 258b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov size_t i; 259b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov 260b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov if (BROTLI_IS_OOM(m)) return; 261b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov 262b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov for (i = 0; i < in_size; ++i) { 263b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov cluster_size[i] = 1; 264b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 265b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov 266b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov for (i = 0; i < in_size; ++i) { 267b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov out[i] = in[i]; 268b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov out[i].bit_cost_ = FN(BrotliPopulationCost)(&in[i]); 269b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov histogram_symbols[i] = (uint32_t)i; 270b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 271b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov 272b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov for (i = 0; i < in_size; i += max_input_histograms) { 273b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov size_t num_to_combine = 274b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov BROTLI_MIN(size_t, in_size - i, max_input_histograms); 275b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov size_t num_new_clusters; 276b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov size_t j; 277b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov for (j = 0; j < num_to_combine; ++j) { 278b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov clusters[num_clusters + j] = (uint32_t)(i + j); 279b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 280b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov num_new_clusters = 281b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov FN(BrotliHistogramCombine)(out, cluster_size, 282b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov &histogram_symbols[i], 283b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov &clusters[num_clusters], pairs, 284b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov num_to_combine, num_to_combine, 285b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov max_histograms, pairs_capacity); 286b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov num_clusters += num_new_clusters; 287b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 288b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov 289b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov { 290b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov /* For the second pass, we limit the total number of histogram pairs. 291b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov After this limit is reached, we only keep searching for the best pair. */ 292b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov size_t max_num_pairs = BROTLI_MIN(size_t, 293b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov 64 * num_clusters, (num_clusters / 2) * num_clusters); 294b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov BROTLI_ENSURE_CAPACITY( 295b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov m, HistogramPair, pairs, pairs_capacity, max_num_pairs + 1); 296b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov if (BROTLI_IS_OOM(m)) return; 297b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov 298b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov /* Collapse similar histograms. */ 299b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov num_clusters = FN(BrotliHistogramCombine)(out, cluster_size, 300b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov histogram_symbols, clusters, 301b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov pairs, num_clusters, in_size, 302b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov max_histograms, max_num_pairs); 303b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov } 304b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov BROTLI_FREE(m, pairs); 305b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov BROTLI_FREE(m, cluster_size); 306b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov /* Find the optimal map from original histograms to the final ones. */ 307b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov FN(BrotliHistogramRemap)(in, in_size, clusters, num_clusters, 308b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov out, histogram_symbols); 309b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov BROTLI_FREE(m, clusters); 310b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov /* Convert the context map to a canonical form. */ 311b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov *out_size = FN(BrotliHistogramReindex)(m, out, histogram_symbols, in_size); 312b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov if (BROTLI_IS_OOM(m)) return; 313b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov}) 314b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov 315b972c67780f03256a3fbf81dc3350a4bf00aa4adEugene Kliuchnikov#undef HistogramType 316