15025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov/* Copyright 2015 Google Inc. All Rights Reserved.
25025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov
35025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov   Distributed under MIT license.
45025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov   See file LICENSE for detail or copy at https://opensource.org/licenses/MIT
55025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov*/
65025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov
75025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikovpackage org.brotli.dec;
85025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov
95025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov/**
105025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov * Utilities for building Huffman decoding tables.
115025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov */
125025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikovfinal class Huffman {
135025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov
145025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov  /**
155025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov   * Maximum possible Huffman table size for an alphabet size of 704, max code length 15 and root
165025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov   * table bits 8.
175025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov   */
185025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov  static final int HUFFMAN_MAX_TABLE_SIZE = 1080;
195025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov
205025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov  private static final int MAX_LENGTH = 15;
215025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov
225025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov  /**
235025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov   * Returns reverse(reverse(key, len) + 1, len).
245025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov   *
255025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov   * <p> reverse(key, len) is the bit-wise reversal of the len least significant bits of key.
265025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov   */
275025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov  private static int getNextKey(int key, int len) {
285025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    int step = 1 << (len - 1);
295025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    while ((key & step) != 0) {
305025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov      step >>= 1;
315025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    }
325025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    return (key & (step - 1)) + step;
335025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov  }
345025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov
355025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov  /**
365025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov   * Stores {@code item} in {@code table[0], table[step], table[2 * step] .., table[end]}.
375025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov   *
385025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov   * <p> Assumes that end is an integer multiple of step.
395025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov   */
405025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov  private static void replicateValue(int[] table, int offset, int step, int end, int item) {
415025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    do {
425025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov      end -= step;
435025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov      table[offset + end] = item;
445025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    } while (end > 0);
455025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov  }
465025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov
475025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov  /**
485025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov   * @param count histogram of bit lengths for the remaining symbols,
495025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov   * @param len code length of the next processed symbol.
505025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov   * @return table width of the next 2nd level table.
515025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov   */
525025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov  private static int nextTableBitSize(int[] count, int len, int rootBits) {
535025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    int left = 1 << (len - rootBits);
545025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    while (len < MAX_LENGTH) {
555025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov      left -= count[len];
565025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov      if (left <= 0) {
575025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov        break;
585025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov      }
595025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov      len++;
605025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov      left <<= 1;
615025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    }
625025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    return len - rootBits;
635025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov  }
645025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov
655025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov  /**
665025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov   * Builds Huffman lookup table assuming code lengths are in symbol order.
675025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov   */
685025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov  static void buildHuffmanTable(int[] rootTable, int tableOffset, int rootBits, int[] codeLengths,
695025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov      int codeLengthsSize) {
705025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    int key; // Reversed prefix code.
715025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    int[] sorted = new int[codeLengthsSize]; // Symbols sorted by code length.
725025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    // TODO: fill with zeroes?
735025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    int[] count = new int[MAX_LENGTH + 1]; // Number of codes of each length.
745025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    int[] offset = new int[MAX_LENGTH + 1]; // Offsets in sorted table for each length.
755025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    int symbol;
765025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov
775025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    // Build histogram of code lengths.
785025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    for (symbol = 0; symbol < codeLengthsSize; symbol++) {
795025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov      count[codeLengths[symbol]]++;
805025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    }
815025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov
825025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    // Generate offsets into sorted symbol table by code length.
835025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    offset[1] = 0;
845025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    for (int len = 1; len < MAX_LENGTH; len++) {
855025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov      offset[len + 1] = offset[len] + count[len];
865025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    }
875025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov
885025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    // Sort symbols by length, by symbol order within each length.
895025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    for (symbol = 0; symbol < codeLengthsSize; symbol++) {
905025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov      if (codeLengths[symbol] != 0) {
915025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov        sorted[offset[codeLengths[symbol]]++] = symbol;
925025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov      }
935025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    }
945025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov
955025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    int tableBits = rootBits;
965025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    int tableSize = 1 << tableBits;
975025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    int totalSize = tableSize;
985025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov
995025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    // Special case code with only one value.
1005025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    if (offset[MAX_LENGTH] == 1) {
1015025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov      for (key = 0; key < totalSize; key++) {
1025025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov        rootTable[tableOffset + key] = sorted[0];
1035025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov      }
1045025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov      return;
1055025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    }
1065025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov
1075025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    // Fill in root table.
1085025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    key = 0;
1095025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    symbol = 0;
1105025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    for (int len = 1, step = 2; len <= rootBits; len++, step <<= 1) {
1115025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov      for (; count[len] > 0; count[len]--) {
1125025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov        replicateValue(rootTable, tableOffset + key, step, tableSize, len << 16 | sorted[symbol++]);
1135025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov        key = getNextKey(key, len);
1145025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov      }
1155025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    }
1165025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov
1175025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    // Fill in 2nd level tables and add pointers to root table.
1185025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    int mask = totalSize - 1;
1195025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    int low = -1;
1205025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    int currentOffset = tableOffset;
1215025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    for (int len = rootBits + 1, step = 2; len <= MAX_LENGTH; len++, step <<= 1) {
1225025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov      for (; count[len] > 0; count[len]--) {
1235025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov        if ((key & mask) != low) {
1245025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov          currentOffset += tableSize;
1255025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov          tableBits = nextTableBitSize(count, len, rootBits);
1265025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov          tableSize = 1 << tableBits;
1275025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov          totalSize += tableSize;
1285025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov          low = key & mask;
1295025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov          rootTable[tableOffset + low] =
1305025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov              (tableBits + rootBits) << 16 | (currentOffset - tableOffset - low);
1315025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov        }
1325025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov        replicateValue(rootTable, currentOffset + (key >> rootBits), step, tableSize,
1335025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov            (len - rootBits) << 16 | sorted[symbol++]);
1345025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov        key = getNextKey(key, len);
1355025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov      }
1365025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov    }
1375025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov  }
1385025365d0fe5a5ca874574b952ef53cfa98ca5c1Eugene Kliuchnikov}
139