15025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov/* Copyright 2015 Google Inc. All Rights Reserved. 25025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov 35025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov Distributed under MIT license. 45025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov See file LICENSE for detail or copy at https://opensource.org/licenses/MIT 55025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov*/ 65025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov 75025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikovpackage org.brotli.dec; 85025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov 95025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov/** 105025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov * Utilities for building Huffman decoding tables. 115025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov */ 125025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikovfinal class Huffman { 135025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov 145025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov /** 155025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov * Maximum possible Huffman table size for an alphabet size of 704, max code length 15 and root 165025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov * table bits 8. 175025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov */ 185025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov static final int HUFFMAN_MAX_TABLE_SIZE = 1080; 195025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov 205025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov private static final int MAX_LENGTH = 15; 215025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov 225025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov /** 235025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov * Returns reverse(reverse(key, len) + 1, len). 245025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov * 255025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov * <p> reverse(key, len) is the bit-wise reversal of the len least significant bits of key. 265025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov */ 275025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov private static int getNextKey(int key, int len) { 285025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov int step = 1 << (len - 1); 295025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov while ((key & step) != 0) { 305025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov step >>= 1; 315025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov } 325025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov return (key & (step - 1)) + step; 335025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov } 345025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov 355025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov /** 365025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov * Stores {@code item} in {@code table[0], table[step], table[2 * step] .., table[end]}. 375025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov * 385025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov * <p> Assumes that end is an integer multiple of step. 395025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov */ 405025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov private static void replicateValue(int[] table, int offset, int step, int end, int item) { 415025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov do { 425025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov end -= step; 435025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov table[offset + end] = item; 445025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov } while (end > 0); 455025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov } 465025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov 475025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov /** 485025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov * @param count histogram of bit lengths for the remaining symbols, 495025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov * @param len code length of the next processed symbol. 505025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov * @return table width of the next 2nd level table. 515025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov */ 525025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov private static int nextTableBitSize(int[] count, int len, int rootBits) { 535025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov int left = 1 << (len - rootBits); 545025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov while (len < MAX_LENGTH) { 555025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov left -= count[len]; 565025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov if (left <= 0) { 575025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov break; 585025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov } 595025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov len++; 605025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov left <<= 1; 615025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov } 625025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov return len - rootBits; 635025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov } 645025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov 655025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov /** 665025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov * Builds Huffman lookup table assuming code lengths are in symbol order. 675025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov */ 685025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov static void buildHuffmanTable(int[] rootTable, int tableOffset, int rootBits, int[] codeLengths, 695025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov int codeLengthsSize) { 705025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov int key; // Reversed prefix code. 715025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov int[] sorted = new int[codeLengthsSize]; // Symbols sorted by code length. 725025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov // TODO: fill with zeroes? 735025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov int[] count = new int[MAX_LENGTH + 1]; // Number of codes of each length. 745025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov int[] offset = new int[MAX_LENGTH + 1]; // Offsets in sorted table for each length. 755025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov int symbol; 765025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov 775025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov // Build histogram of code lengths. 785025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov for (symbol = 0; symbol < codeLengthsSize; symbol++) { 795025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov count[codeLengths[symbol]]++; 805025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov } 815025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov 825025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov // Generate offsets into sorted symbol table by code length. 835025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov offset[1] = 0; 845025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov for (int len = 1; len < MAX_LENGTH; len++) { 855025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov offset[len + 1] = offset[len] + count[len]; 865025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov } 875025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov 885025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov // Sort symbols by length, by symbol order within each length. 895025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov for (symbol = 0; symbol < codeLengthsSize; symbol++) { 905025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov if (codeLengths[symbol] != 0) { 915025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov sorted[offset[codeLengths[symbol]]++] = symbol; 925025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov } 935025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov } 945025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov 955025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov int tableBits = rootBits; 965025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov int tableSize = 1 << tableBits; 975025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov int totalSize = tableSize; 985025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov 995025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov // Special case code with only one value. 1005025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov if (offset[MAX_LENGTH] == 1) { 1015025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov for (key = 0; key < totalSize; key++) { 1025025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov rootTable[tableOffset + key] = sorted[0]; 1035025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov } 1045025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov return; 1055025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov } 1065025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov 1075025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov // Fill in root table. 1085025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov key = 0; 1095025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov symbol = 0; 1105025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov for (int len = 1, step = 2; len <= rootBits; len++, step <<= 1) { 1115025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov for (; count[len] > 0; count[len]--) { 1125025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov replicateValue(rootTable, tableOffset + key, step, tableSize, len << 16 | sorted[symbol++]); 1135025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov key = getNextKey(key, len); 1145025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov } 1155025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov } 1165025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov 1175025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov // Fill in 2nd level tables and add pointers to root table. 1185025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov int mask = totalSize - 1; 1195025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov int low = -1; 1205025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov int currentOffset = tableOffset; 1215025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov for (int len = rootBits + 1, step = 2; len <= MAX_LENGTH; len++, step <<= 1) { 1225025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov for (; count[len] > 0; count[len]--) { 1235025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov if ((key & mask) != low) { 1245025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov currentOffset += tableSize; 1255025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov tableBits = nextTableBitSize(count, len, rootBits); 1265025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov tableSize = 1 << tableBits; 1275025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov totalSize += tableSize; 1285025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov low = key & mask; 1295025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov rootTable[tableOffset + low] = 1305025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov (tableBits + rootBits) << 16 | (currentOffset - tableOffset - low); 1315025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov } 1325025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov replicateValue(rootTable, currentOffset + (key >> rootBits), step, tableSize, 1335025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov (len - rootBits) << 16 | sorted[symbol++]); 1345025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov key = getNextKey(key, len); 1355025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov } 1365025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov } 1375025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov } 1385025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov} 139