1381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes/* enough.c -- determine the maximum size of inflate's Huffman code tables over
2381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes * all possible valid and complete Huffman codes, subject to a length limit.
304351a92ecc8429c999acbfc5dfe5aa8bee1d19dElliott Hughes * Copyright (C) 2007, 2008, 2012 Mark Adler
404351a92ecc8429c999acbfc5dfe5aa8bee1d19dElliott Hughes * Version 1.4  18 August 2012  Mark Adler
5381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes */
6381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
7381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes/* Version history:
8381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   1.0   3 Jan 2007  First version (derived from codecount.c version 1.4)
9381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   1.1   4 Jan 2007  Use faster incremental table usage computation
10381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                     Prune examine() search on previously visited states
11381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   1.2   5 Jan 2007  Comments clean up
12381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                     As inflate does, decrease root for short codes
13381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                     Refuse cases where inflate would increase root
14381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   1.3  17 Feb 2008  Add argument for initial root table size
15381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                     Fix bug for initial root table size == max - 1
16381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                     Use a macro to compute the history index
1704351a92ecc8429c999acbfc5dfe5aa8bee1d19dElliott Hughes   1.4  18 Aug 2012  Avoid shifts more than bits in type (caused endless loop!)
1804351a92ecc8429c999acbfc5dfe5aa8bee1d19dElliott Hughes                     Clean up comparisons of different types
1904351a92ecc8429c999acbfc5dfe5aa8bee1d19dElliott Hughes                     Clean up code indentation
20381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes */
21381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
22381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes/*
23381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   Examine all possible Huffman codes for a given number of symbols and a
24381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   maximum code length in bits to determine the maximum table size for zilb's
25381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   inflate.  Only complete Huffman codes are counted.
26381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
27381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   Two codes are considered distinct if the vectors of the number of codes per
28381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   length are not identical.  So permutations of the symbol assignments result
29381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   in the same code for the counting, as do permutations of the assignments of
30381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   the bit values to the codes (i.e. only canonical codes are counted).
31381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
32381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   We build a code from shorter to longer lengths, determining how many symbols
33381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   are coded at each length.  At each step, we have how many symbols remain to
34381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   be coded, what the last code length used was, and how many bit patterns of
35381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   that length remain unused. Then we add one to the code length and double the
36381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   number of unused patterns to graduate to the next code length.  We then
37381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   assign all portions of the remaining symbols to that code length that
38381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   preserve the properties of a correct and eventually complete code.  Those
39381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   properties are: we cannot use more bit patterns than are available; and when
40381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   all the symbols are used, there are exactly zero possible bit patterns
41381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   remaining.
42381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
43381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   The inflate Huffman decoding algorithm uses two-level lookup tables for
44381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   speed.  There is a single first-level table to decode codes up to root bits
45381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   in length (root == 9 in the current inflate implementation).  The table
46381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   has 1 << root entries and is indexed by the next root bits of input.  Codes
47381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   shorter than root bits have replicated table entries, so that the correct
48381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   entry is pointed to regardless of the bits that follow the short code.  If
49381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   the code is longer than root bits, then the table entry points to a second-
50381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   level table.  The size of that table is determined by the longest code with
51381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   that root-bit prefix.  If that longest code has length len, then the table
52381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   has size 1 << (len - root), to index the remaining bits in that set of
53381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   codes.  Each subsequent root-bit prefix then has its own sub-table.  The
54381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   total number of table entries required by the code is calculated
55381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   incrementally as the number of codes at each bit length is populated.  When
56381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   all of the codes are shorter than root bits, then root is reduced to the
57381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   longest code length, resulting in a single, smaller, one-level table.
58381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
59381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   The inflate algorithm also provides for small values of root (relative to
60381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   the log2 of the number of symbols), where the shortest code has more bits
61381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   than root.  In that case, root is increased to the length of the shortest
62381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   code.  This program, by design, does not handle that case, so it is verified
63381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   that the number of symbols is less than 2^(root + 1).
64381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
65381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   In order to speed up the examination (by about ten orders of magnitude for
66381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   the default arguments), the intermediate states in the build-up of a code
67381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   are remembered and previously visited branches are pruned.  The memory
68381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   required for this will increase rapidly with the total number of symbols and
69381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   the maximum code length in bits.  However this is a very small price to pay
70381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   for the vast speedup.
71381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
72381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   First, all of the possible Huffman codes are counted, and reachable
73381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   intermediate states are noted by a non-zero count in a saved-results array.
74381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   Second, the intermediate states that lead to (root + 1) bit or longer codes
75381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   are used to look at all sub-codes from those junctures for their inflate
76381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   memory usage.  (The amount of memory used is not affected by the number of
77381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   codes of root bits or less in length.)  Third, the visited states in the
78381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   construction of those sub-codes and the associated calculation of the table
79381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   size is recalled in order to avoid recalculating from the same juncture.
80381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   Beginning the code examination at (root + 1) bit codes, which is enabled by
81381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   identifying the reachable nodes, accounts for about six of the orders of
82381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   magnitude of improvement for the default arguments.  About another four
83381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   orders of magnitude come from not revisiting previous states.  Out of
84381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   approximately 2x10^16 possible Huffman codes, only about 2x10^6 sub-codes
85381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   need to be examined to cover all of the possible table memory usage cases
86381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   for the default arguments of 286 symbols limited to 15-bit codes.
87381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
88381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   Note that an unsigned long long type is used for counting.  It is quite easy
89381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   to exceed the capacity of an eight-byte integer with a large number of
90381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   symbols and a large maximum code length, so multiple-precision arithmetic
91381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   would need to replace the unsigned long long arithmetic in that case.  This
92381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   program will abort if an overflow occurs.  The big_t type identifies where
93381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   the counting takes place.
94381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
95381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   An unsigned long long type is also used for calculating the number of
96381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   possible codes remaining at the maximum length.  This limits the maximum
97381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   code length to the number of bits in a long long minus the number of bits
98381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   needed to represent the symbols in a flat code.  The code_t type identifies
99381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   where the bit pattern counting takes place.
100381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes */
101381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
102381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes#include <stdio.h>
103381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes#include <stdlib.h>
104381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes#include <string.h>
105381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes#include <assert.h>
106381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
107381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes#define local static
108381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
109381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes/* special data types */
110381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughestypedef unsigned long long big_t;   /* type for code counting */
111381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughestypedef unsigned long long code_t;  /* type for bit pattern counting */
112381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughesstruct tab {                        /* type for been here check */
113381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    size_t len;         /* length of bit vector in char's */
114381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    char *vec;          /* allocated bit vector */
115381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes};
116381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
117381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes/* The array for saving results, num[], is indexed with this triplet:
118381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
119381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes      syms: number of symbols remaining to code
120381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes      left: number of available bit patterns at length len
121381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes      len: number of bits in the codes currently being assigned
122381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
123381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   Those indices are constrained thusly when saving results:
124381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
125381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes      syms: 3..totsym (totsym == total symbols to code)
126381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes      left: 2..syms - 1, but only the evens (so syms == 8 -> 2, 4, 6)
127381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes      len: 1..max - 1 (max == maximum code length in bits)
128381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
129381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   syms == 2 is not saved since that immediately leads to a single code.  left
130381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   must be even, since it represents the number of available bit patterns at
131381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   the current length, which is double the number at the previous length.
132381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   left ends at syms-1 since left == syms immediately results in a single code.
133381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   (left > sym is not allowed since that would result in an incomplete code.)
134381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   len is less than max, since the code completes immediately when len == max.
135381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
136381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   The offset into the array is calculated for the three indices with the
137381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   first one (syms) being outermost, and the last one (len) being innermost.
138381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   We build the array with length max-1 lists for the len index, with syms-3
139381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   of those for each symbol.  There are totsym-2 of those, with each one
140381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   varying in length as a function of sym.  See the calculation of index in
141381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   count() for the index, and the calculation of size in main() for the size
142381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   of the array.
143381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
144381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   For the deflate example of 286 symbols limited to 15-bit codes, the array
145381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   has 284,284 entries, taking up 2.17 MB for an 8-byte big_t.  More than
146381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   half of the space allocated for saved results is actually used -- not all
147381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   possible triplets are reached in the generation of valid Huffman codes.
148381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes */
149381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
150381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes/* The array for tracking visited states, done[], is itself indexed identically
151381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   to the num[] array as described above for the (syms, left, len) triplet.
152381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   Each element in the array is further indexed by the (mem, rem) doublet,
153381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   where mem is the amount of inflate table space used so far, and rem is the
154381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   remaining unused entries in the current inflate sub-table.  Each indexed
155381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   element is simply one bit indicating whether the state has been visited or
156381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   not.  Since the ranges for mem and rem are not known a priori, each bit
157381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   vector is of a variable size, and grows as needed to accommodate the visited
158381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   states.  mem and rem are used to calculate a single index in a triangular
159381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   array.  Since the range of mem is expected in the default case to be about
160381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   ten times larger than the range of rem, the array is skewed to reduce the
161381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   memory usage, with eight times the range for mem than for rem.  See the
162381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   calculations for offset and bit in beenhere() for the details.
163381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
164381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   For the deflate example of 286 symbols limited to 15-bit codes, the bit
165381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   vectors grow to total approximately 21 MB, in addition to the 4.3 MB done[]
166381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   array itself.
167381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes */
168381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
169381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes/* Globals to avoid propagating constants or constant pointers recursively */
170381716e9396b55b1adb8235b020c37344f60ab07Elliott Hugheslocal int max;          /* maximum allowed bit length for the codes */
171381716e9396b55b1adb8235b020c37344f60ab07Elliott Hugheslocal int root;         /* size of base code table in bits */
172381716e9396b55b1adb8235b020c37344f60ab07Elliott Hugheslocal int large;        /* largest code table so far */
173381716e9396b55b1adb8235b020c37344f60ab07Elliott Hugheslocal size_t size;      /* number of elements in num and done */
174381716e9396b55b1adb8235b020c37344f60ab07Elliott Hugheslocal int *code;        /* number of symbols assigned to each bit length */
175381716e9396b55b1adb8235b020c37344f60ab07Elliott Hugheslocal big_t *num;       /* saved results array for code counting */
176381716e9396b55b1adb8235b020c37344f60ab07Elliott Hugheslocal struct tab *done; /* states already evaluated array */
177381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
178381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes/* Index function for num[] and done[] */
179381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes#define INDEX(i,j,k) (((size_t)((i-1)>>1)*((i-2)>>1)+(j>>1)-1)*(max-1)+k-1)
180381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
181381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes/* Free allocated space.  Uses globals code, num, and done. */
182381716e9396b55b1adb8235b020c37344f60ab07Elliott Hugheslocal void cleanup(void)
183381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes{
184381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    size_t n;
185381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
186381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    if (done != NULL) {
187381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        for (n = 0; n < size; n++)
188381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            if (done[n].len)
189381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                free(done[n].vec);
190381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        free(done);
191381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    }
192381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    if (num != NULL)
193381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        free(num);
194381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    if (code != NULL)
195381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        free(code);
196381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes}
197381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
198381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes/* Return the number of possible Huffman codes using bit patterns of lengths
199381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   len through max inclusive, coding syms symbols, with left bit patterns of
200381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   length len unused -- return -1 if there is an overflow in the counting.
201381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   Keep a record of previous results in num to prevent repeating the same
202381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   calculation.  Uses the globals max and num. */
203381716e9396b55b1adb8235b020c37344f60ab07Elliott Hugheslocal big_t count(int syms, int len, int left)
204381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes{
205381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    big_t sum;          /* number of possible codes from this juncture */
206381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    big_t got;          /* value returned from count() */
207381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    int least;          /* least number of syms to use at this juncture */
208381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    int most;           /* most number of syms to use at this juncture */
209381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    int use;            /* number of bit patterns to use in next call */
210381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    size_t index;       /* index of this case in *num */
211381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
212381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* see if only one possible code */
213381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    if (syms == left)
214381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        return 1;
215381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
216381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* note and verify the expected state */
217381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    assert(syms > left && left > 0 && len < max);
218381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
219381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* see if we've done this one already */
220381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    index = INDEX(syms, left, len);
221381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    got = num[index];
222381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    if (got)
223381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        return got;         /* we have -- return the saved result */
224381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
225381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* we need to use at least this many bit patterns so that the code won't be
226381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes       incomplete at the next length (more bit patterns than symbols) */
227381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    least = (left << 1) - syms;
228381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    if (least < 0)
229381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        least = 0;
230381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
231381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* we can use at most this many bit patterns, lest there not be enough
232381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes       available for the remaining symbols at the maximum length (if there were
233381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes       no limit to the code length, this would become: most = left - 1) */
234381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    most = (((code_t)left << (max - len)) - syms) /
235381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            (((code_t)1 << (max - len)) - 1);
236381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
237381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* count all possible codes from this juncture and add them up */
238381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    sum = 0;
239381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    for (use = least; use <= most; use++) {
240381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        got = count(syms - use, len + 1, (left - use) << 1);
241381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        sum += got;
24204351a92ecc8429c999acbfc5dfe5aa8bee1d19dElliott Hughes        if (got == (big_t)0 - 1 || sum < got)   /* overflow */
24304351a92ecc8429c999acbfc5dfe5aa8bee1d19dElliott Hughes            return (big_t)0 - 1;
244381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    }
245381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
246381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* verify that all recursive calls are productive */
247381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    assert(sum != 0);
248381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
249381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* save the result and return it */
250381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    num[index] = sum;
251381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    return sum;
252381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes}
253381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
254381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes/* Return true if we've been here before, set to true if not.  Set a bit in a
255381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   bit vector to indicate visiting this state.  Each (syms,len,left) state
256381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   has a variable size bit vector indexed by (mem,rem).  The bit vector is
257381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   lengthened if needed to allow setting the (mem,rem) bit. */
258381716e9396b55b1adb8235b020c37344f60ab07Elliott Hugheslocal int beenhere(int syms, int len, int left, int mem, int rem)
259381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes{
260381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    size_t index;       /* index for this state's bit vector */
261381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    size_t offset;      /* offset in this state's bit vector */
262381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    int bit;            /* mask for this state's bit */
263381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    size_t length;      /* length of the bit vector in bytes */
264381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    char *vector;       /* new or enlarged bit vector */
265381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
266381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* point to vector for (syms,left,len), bit in vector for (mem,rem) */
267381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    index = INDEX(syms, left, len);
268381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    mem -= 1 << root;
269381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    offset = (mem >> 3) + rem;
270381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    offset = ((offset * (offset + 1)) >> 1) + rem;
271381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    bit = 1 << (mem & 7);
272381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
273381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* see if we've been here */
274381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    length = done[index].len;
275381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    if (offset < length && (done[index].vec[offset] & bit) != 0)
276381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        return 1;       /* done this! */
277381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
278381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* we haven't been here before -- set the bit to show we have now */
279381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
280381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* see if we need to lengthen the vector in order to set the bit */
281381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    if (length <= offset) {
282381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        /* if we have one already, enlarge it, zero out the appended space */
283381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        if (length) {
284381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            do {
285381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                length <<= 1;
286381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            } while (length <= offset);
287381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            vector = realloc(done[index].vec, length);
288381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            if (vector != NULL)
289381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                memset(vector + done[index].len, 0, length - done[index].len);
290381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        }
291381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
292381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        /* otherwise we need to make a new vector and zero it out */
293381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        else {
294381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            length = 1 << (len - root);
295381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            while (length <= offset)
296381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                length <<= 1;
297381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            vector = calloc(length, sizeof(char));
298381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        }
299381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
300381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        /* in either case, bail if we can't get the memory */
301381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        if (vector == NULL) {
302381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            fputs("abort: unable to allocate enough memory\n", stderr);
303381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            cleanup();
304381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            exit(1);
305381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        }
306381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
307381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        /* install the new vector */
308381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        done[index].len = length;
309381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        done[index].vec = vector;
310381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    }
311381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
312381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* set the bit */
313381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    done[index].vec[offset] |= bit;
314381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    return 0;
315381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes}
316381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
317381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes/* Examine all possible codes from the given node (syms, len, left).  Compute
318381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   the amount of memory required to build inflate's decoding tables, where the
319381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   number of code structures used so far is mem, and the number remaining in
320381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   the current sub-table is rem.  Uses the globals max, code, root, large, and
321381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   done. */
322381716e9396b55b1adb8235b020c37344f60ab07Elliott Hugheslocal void examine(int syms, int len, int left, int mem, int rem)
323381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes{
324381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    int least;          /* least number of syms to use at this juncture */
325381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    int most;           /* most number of syms to use at this juncture */
326381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    int use;            /* number of bit patterns to use in next call */
327381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
328381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* see if we have a complete code */
329381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    if (syms == left) {
330381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        /* set the last code entry */
331381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        code[len] = left;
332381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
333381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        /* complete computation of memory used by this code */
334381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        while (rem < left) {
335381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            left -= rem;
336381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            rem = 1 << (len - root);
337381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            mem += rem;
338381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        }
339381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        assert(rem == left);
340381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
341381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        /* if this is a new maximum, show the entries used and the sub-code */
342381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        if (mem > large) {
343381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            large = mem;
344381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            printf("max %d: ", mem);
345381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            for (use = root + 1; use <= max; use++)
346381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                if (code[use])
347381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                    printf("%d[%d] ", code[use], use);
348381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            putchar('\n');
349381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            fflush(stdout);
350381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        }
351381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
352381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        /* remove entries as we drop back down in the recursion */
353381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        code[len] = 0;
354381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        return;
355381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    }
356381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
357381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* prune the tree if we can */
358381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    if (beenhere(syms, len, left, mem, rem))
359381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        return;
360381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
361381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* we need to use at least this many bit patterns so that the code won't be
362381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes       incomplete at the next length (more bit patterns than symbols) */
363381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    least = (left << 1) - syms;
364381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    if (least < 0)
365381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        least = 0;
366381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
367381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* we can use at most this many bit patterns, lest there not be enough
368381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes       available for the remaining symbols at the maximum length (if there were
369381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes       no limit to the code length, this would become: most = left - 1) */
370381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    most = (((code_t)left << (max - len)) - syms) /
371381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            (((code_t)1 << (max - len)) - 1);
372381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
373381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* occupy least table spaces, creating new sub-tables as needed */
374381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    use = least;
375381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    while (rem < use) {
376381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        use -= rem;
377381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        rem = 1 << (len - root);
378381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        mem += rem;
379381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    }
380381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    rem -= use;
381381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
382381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* examine codes from here, updating table space as we go */
383381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    for (use = least; use <= most; use++) {
384381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        code[len] = use;
385381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        examine(syms - use, len + 1, (left - use) << 1,
386381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                mem + (rem ? 1 << (len - root) : 0), rem << 1);
387381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        if (rem == 0) {
388381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            rem = 1 << (len - root);
389381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            mem += rem;
390381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        }
391381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        rem--;
392381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    }
393381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
394381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* remove entries as we drop back down in the recursion */
395381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    code[len] = 0;
396381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes}
397381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
398381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes/* Look at all sub-codes starting with root + 1 bits.  Look at only the valid
399381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   intermediate code states (syms, left, len).  For each completed code,
400381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   calculate the amount of memory required by inflate to build the decoding
401381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   tables. Find the maximum amount of memory required and show the code that
402381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   requires that maximum.  Uses the globals max, root, and num. */
403381716e9396b55b1adb8235b020c37344f60ab07Elliott Hugheslocal void enough(int syms)
404381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes{
405381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    int n;              /* number of remaing symbols for this node */
406381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    int left;           /* number of unused bit patterns at this length */
407381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    size_t index;       /* index of this case in *num */
408381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
409381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* clear code */
410381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    for (n = 0; n <= max; n++)
411381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        code[n] = 0;
412381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
413381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* look at all (root + 1) bit and longer codes */
414381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    large = 1 << root;              /* base table */
415381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    if (root < max)                 /* otherwise, there's only a base table */
416381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        for (n = 3; n <= syms; n++)
417381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            for (left = 2; left < n; left += 2)
418381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            {
419381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                /* look at all reachable (root + 1) bit nodes, and the
420381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                   resulting codes (complete at root + 2 or more) */
421381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                index = INDEX(n, left, root + 1);
422381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                if (root + 1 < max && num[index])       /* reachable node */
423381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                    examine(n, root + 1, left, 1 << root, 0);
424381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
425381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                /* also look at root bit codes with completions at root + 1
426381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                   bits (not saved in num, since complete), just in case */
427381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                if (num[index - 1] && n <= left << 1)
428381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                    examine((n - left) << 1, root + 1, (n - left) << 1,
429381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                            1 << root, 0);
430381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            }
431381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
432381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* done */
433381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    printf("done: maximum of %d table entries\n", large);
434381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes}
435381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
436381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes/*
437381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   Examine and show the total number of possible Huffman codes for a given
438381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   maximum number of symbols, initial root table size, and maximum code length
439381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   in bits -- those are the command arguments in that order.  The default
440381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   values are 286, 9, and 15 respectively, for the deflate literal/length code.
441381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   The possible codes are counted for each number of coded symbols from two to
442381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   the maximum.  The counts for each of those and the total number of codes are
443381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   shown.  The maximum number of inflate table entires is then calculated
444381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   across all possible codes.  Each new maximum number of table entries and the
445381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   associated sub-code (starting at root + 1 == 10 bits) is shown.
446381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
447381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   To count and examine Huffman codes that are not length-limited, provide a
448381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   maximum length equal to the number of symbols minus one.
449381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
450381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   For the deflate literal/length code, use "enough".  For the deflate distance
451381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   code, use "enough 30 6".
452381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
453381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   This uses the %llu printf format to print big_t numbers, which assumes that
454381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   big_t is an unsigned long long.  If the big_t type is changed (for example
455381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   to a multiple precision type), the method of printing will also need to be
456381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes   updated.
457381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes */
458381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughesint main(int argc, char **argv)
459381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes{
460381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    int syms;           /* total number of symbols to code */
461381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    int n;              /* number of symbols to code for this run */
462381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    big_t got;          /* return value of count() */
463381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    big_t sum;          /* accumulated number of codes over n */
46404351a92ecc8429c999acbfc5dfe5aa8bee1d19dElliott Hughes    code_t word;        /* for counting bits in code_t */
465381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
466381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* set up globals for cleanup() */
467381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    code = NULL;
468381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    num = NULL;
469381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    done = NULL;
470381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
471381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* get arguments -- default to the deflate literal/length code */
472381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    syms = 286;
47304351a92ecc8429c999acbfc5dfe5aa8bee1d19dElliott Hughes    root = 9;
474381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    max = 15;
475381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    if (argc > 1) {
476381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        syms = atoi(argv[1]);
477381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        if (argc > 2) {
478381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            root = atoi(argv[2]);
47904351a92ecc8429c999acbfc5dfe5aa8bee1d19dElliott Hughes            if (argc > 3)
48004351a92ecc8429c999acbfc5dfe5aa8bee1d19dElliott Hughes                max = atoi(argv[3]);
48104351a92ecc8429c999acbfc5dfe5aa8bee1d19dElliott Hughes        }
482381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    }
483381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    if (argc > 4 || syms < 2 || root < 1 || max < 1) {
484381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        fputs("invalid arguments, need: [sym >= 2 [root >= 1 [max >= 1]]]\n",
48504351a92ecc8429c999acbfc5dfe5aa8bee1d19dElliott Hughes              stderr);
486381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        return 1;
487381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    }
488381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
489381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* if not restricting the code length, the longest is syms - 1 */
490381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    if (max > syms - 1)
491381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        max = syms - 1;
492381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
493381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* determine the number of bits in a code_t */
49404351a92ecc8429c999acbfc5dfe5aa8bee1d19dElliott Hughes    for (n = 0, word = 1; word; n++, word <<= 1)
49504351a92ecc8429c999acbfc5dfe5aa8bee1d19dElliott Hughes        ;
496381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
497381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* make sure that the calculation of most will not overflow */
49804351a92ecc8429c999acbfc5dfe5aa8bee1d19dElliott Hughes    if (max > n || (code_t)(syms - 2) >= (((code_t)0 - 1) >> (max - 1))) {
499381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        fputs("abort: code length too long for internal types\n", stderr);
500381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        return 1;
501381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    }
502381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
503381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* reject impossible code requests */
50404351a92ecc8429c999acbfc5dfe5aa8bee1d19dElliott Hughes    if ((code_t)(syms - 1) > ((code_t)1 << max) - 1) {
505381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        fprintf(stderr, "%d symbols cannot be coded in %d bits\n",
506381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                syms, max);
507381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        return 1;
508381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    }
509381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
510381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* allocate code vector */
511381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    code = calloc(max + 1, sizeof(int));
512381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    if (code == NULL) {
513381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        fputs("abort: unable to allocate enough memory\n", stderr);
514381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        return 1;
515381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    }
516381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
517381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* determine size of saved results array, checking for overflows,
518381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes       allocate and clear the array (set all to zero with calloc()) */
519381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    if (syms == 2)              /* iff max == 1 */
520381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        num = NULL;             /* won't be saving any results */
521381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    else {
522381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        size = syms >> 1;
523381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        if (size > ((size_t)0 - 1) / (n = (syms - 1) >> 1) ||
524381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                (size *= n, size > ((size_t)0 - 1) / (n = max - 1)) ||
525381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                (size *= n, size > ((size_t)0 - 1) / sizeof(big_t)) ||
526381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes                (num = calloc(size, sizeof(big_t))) == NULL) {
527381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            fputs("abort: unable to allocate enough memory\n", stderr);
528381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            cleanup();
529381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            return 1;
530381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        }
531381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    }
532381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
533381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* count possible codes for all numbers of symbols, add up counts */
534381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    sum = 0;
535381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    for (n = 2; n <= syms; n++) {
536381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        got = count(n, 1, 2);
537381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        sum += got;
53804351a92ecc8429c999acbfc5dfe5aa8bee1d19dElliott Hughes        if (got == (big_t)0 - 1 || sum < got) {     /* overflow */
539381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            fputs("abort: can't count that high!\n", stderr);
540381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            cleanup();
541381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes            return 1;
542381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        }
543381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        printf("%llu %d-codes\n", got, n);
544381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    }
545381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    printf("%llu total codes for 2 to %d symbols", sum, syms);
546381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    if (max < syms - 1)
547381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        printf(" (%d-bit length limit)\n", max);
548381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    else
549381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        puts(" (no length limit)");
550381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
551381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* allocate and clear done array for beenhere() */
552381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    if (syms == 2)
553381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        done = NULL;
554381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    else if (size > ((size_t)0 - 1) / sizeof(struct tab) ||
555381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes             (done = calloc(size, sizeof(struct tab))) == NULL) {
556381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        fputs("abort: unable to allocate enough memory\n", stderr);
557381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        cleanup();
558381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        return 1;
559381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    }
560381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
561381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* find and show maximum inflate table usage */
56204351a92ecc8429c999acbfc5dfe5aa8bee1d19dElliott Hughes    if (root > max)                 /* reduce root to max length */
56304351a92ecc8429c999acbfc5dfe5aa8bee1d19dElliott Hughes        root = max;
56404351a92ecc8429c999acbfc5dfe5aa8bee1d19dElliott Hughes    if ((code_t)syms < ((code_t)1 << (root + 1)))
565381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        enough(syms);
566381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    else
567381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes        puts("cannot handle minimum code lengths > root");
568381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes
569381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    /* done */
570381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    cleanup();
571381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes    return 0;
572381716e9396b55b1adb8235b020c37344f60ab07Elliott Hughes}
573