1/* fitblk.c: example of fitting compressed output to a specified size
2   Not copyrighted -- provided to the public domain
3   Version 1.1  25 November 2004  Mark Adler */
4
5/* Version history:
6   1.0  24 Nov 2004  First version
7   1.1  25 Nov 2004  Change deflateInit2() to deflateInit()
8                     Use fixed-size, stack-allocated raw buffers
9                     Simplify code moving compression to subroutines
10                     Use assert() for internal errors
11                     Add detailed description of approach
12 */
13
14/* Approach to just fitting a requested compressed size:
15
16   fitblk performs three compression passes on a portion of the input
17   data in order to determine how much of that input will compress to
18   nearly the requested output block size.  The first pass generates
19   enough deflate blocks to produce output to fill the requested
20   output size plus a specfied excess amount (see the EXCESS define
21   below).  The last deflate block may go quite a bit past that, but
22   is discarded.  The second pass decompresses and recompresses just
23   the compressed data that fit in the requested plus excess sized
24   buffer.  The deflate process is terminated after that amount of
25   input, which is less than the amount consumed on the first pass.
26   The last deflate block of the result will be of a comparable size
27   to the final product, so that the header for that deflate block and
28   the compression ratio for that block will be about the same as in
29   the final product.  The third compression pass decompresses the
30   result of the second step, but only the compressed data up to the
31   requested size minus an amount to allow the compressed stream to
32   complete (see the MARGIN define below).  That will result in a
33   final compressed stream whose length is less than or equal to the
34   requested size.  Assuming sufficient input and a requested size
35   greater than a few hundred bytes, the shortfall will typically be
36   less than ten bytes.
37
38   If the input is short enough that the first compression completes
39   before filling the requested output size, then that compressed
40   stream is return with no recompression.
41
42   EXCESS is chosen to be just greater than the shortfall seen in a
43   two pass approach similar to the above.  That shortfall is due to
44   the last deflate block compressing more efficiently with a smaller
45   header on the second pass.  EXCESS is set to be large enough so
46   that there is enough uncompressed data for the second pass to fill
47   out the requested size, and small enough so that the final deflate
48   block of the second pass will be close in size to the final deflate
49   block of the third and final pass.  MARGIN is chosen to be just
50   large enough to assure that the final compression has enough room
51   to complete in all cases.
52 */
53
54#include <stdio.h>
55#include <stdlib.h>
56#include <assert.h>
57#include "zlib.h"
58
59#define local static
60
61/* print nastygram and leave */
62local void quit(char *why)
63{
64    fprintf(stderr, "fitblk abort: %s\n", why);
65    exit(1);
66}
67
68#define RAWLEN 4096    /* intermediate uncompressed buffer size */
69
70/* compress from file to def until provided buffer is full or end of
71   input reached; return last deflate() return value, or Z_ERRNO if
72   there was read error on the file */
73local int partcompress(FILE *in, z_streamp def)
74{
75    int ret, flush;
76    unsigned char raw[RAWLEN];
77
78    flush = Z_NO_FLUSH;
79    do {
80        def->avail_in = fread(raw, 1, RAWLEN, in);
81        if (ferror(in))
82            return Z_ERRNO;
83        def->next_in = raw;
84        if (feof(in))
85            flush = Z_FINISH;
86        ret = deflate(def, flush);
87        assert(ret != Z_STREAM_ERROR);
88    } while (def->avail_out != 0 && flush == Z_NO_FLUSH);
89    return ret;
90}
91
92/* recompress from inf's input to def's output; the input for inf and
93   the output for def are set in those structures before calling;
94   return last deflate() return value, or Z_MEM_ERROR if inflate()
95   was not able to allocate enough memory when it needed to */
96local int recompress(z_streamp inf, z_streamp def)
97{
98    int ret, flush;
99    unsigned char raw[RAWLEN];
100
101    flush = Z_NO_FLUSH;
102    do {
103        /* decompress */
104        inf->avail_out = RAWLEN;
105        inf->next_out = raw;
106        ret = inflate(inf, Z_NO_FLUSH);
107        assert(ret != Z_STREAM_ERROR && ret != Z_DATA_ERROR &&
108               ret != Z_NEED_DICT);
109        if (ret == Z_MEM_ERROR)
110            return ret;
111
112        /* compress what was decompresed until done or no room */
113        def->avail_in = RAWLEN - inf->avail_out;
114        def->next_in = raw;
115        if (inf->avail_out != 0)
116            flush = Z_FINISH;
117        ret = deflate(def, flush);
118        assert(ret != Z_STREAM_ERROR);
119    } while (ret != Z_STREAM_END && def->avail_out != 0);
120    return ret;
121}
122
123#define EXCESS 256      /* empirically determined stream overage */
124#define MARGIN 8        /* amount to back off for completion */
125
126/* compress from stdin to fixed-size block on stdout */
127int main(int argc, char **argv)
128{
129    int ret;                /* return code */
130    unsigned size;          /* requested fixed output block size */
131    unsigned have;          /* bytes written by deflate() call */
132    unsigned char *blk;     /* intermediate and final stream */
133    unsigned char *tmp;     /* close to desired size stream */
134    z_stream def, inf;      /* zlib deflate and inflate states */
135
136    /* get requested output size */
137    if (argc != 2)
138        quit("need one argument: size of output block");
139    ret = strtol(argv[1], argv + 1, 10);
140    if (argv[1][0] != 0)
141        quit("argument must be a number");
142    if (ret < 8)            /* 8 is minimum zlib stream size */
143        quit("need positive size of 8 or greater");
144    size = (unsigned)ret;
145
146    /* allocate memory for buffers and compression engine */
147    blk = malloc(size + EXCESS);
148    def.zalloc = Z_NULL;
149    def.zfree = Z_NULL;
150    def.opaque = Z_NULL;
151    ret = deflateInit(&def, Z_DEFAULT_COMPRESSION);
152    if (ret != Z_OK || blk == NULL)
153        quit("out of memory");
154
155    /* compress from stdin until output full, or no more input */
156    def.avail_out = size + EXCESS;
157    def.next_out = blk;
158    ret = partcompress(stdin, &def);
159    if (ret == Z_ERRNO)
160        quit("error reading input");
161
162    /* if it all fit, then size was undersubscribed -- done! */
163    if (ret == Z_STREAM_END && def.avail_out >= EXCESS) {
164        /* write block to stdout */
165        have = size + EXCESS - def.avail_out;
166        if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
167            quit("error writing output");
168
169        /* clean up and print results to stderr */
170        ret = deflateEnd(&def);
171        assert(ret != Z_STREAM_ERROR);
172        free(blk);
173        fprintf(stderr,
174                "%u bytes unused out of %u requested (all input)\n",
175                size - have, size);
176        return 0;
177    }
178
179    /* it didn't all fit -- set up for recompression */
180    inf.zalloc = Z_NULL;
181    inf.zfree = Z_NULL;
182    inf.opaque = Z_NULL;
183    inf.avail_in = 0;
184    inf.next_in = Z_NULL;
185    ret = inflateInit(&inf);
186    tmp = malloc(size + EXCESS);
187    if (ret != Z_OK || tmp == NULL)
188        quit("out of memory");
189    ret = deflateReset(&def);
190    assert(ret != Z_STREAM_ERROR);
191
192    /* do first recompression close to the right amount */
193    inf.avail_in = size + EXCESS;
194    inf.next_in = blk;
195    def.avail_out = size + EXCESS;
196    def.next_out = tmp;
197    ret = recompress(&inf, &def);
198    if (ret == Z_MEM_ERROR)
199        quit("out of memory");
200
201    /* set up for next reocmpression */
202    ret = inflateReset(&inf);
203    assert(ret != Z_STREAM_ERROR);
204    ret = deflateReset(&def);
205    assert(ret != Z_STREAM_ERROR);
206
207    /* do second and final recompression (third compression) */
208    inf.avail_in = size - MARGIN;   /* assure stream will complete */
209    inf.next_in = tmp;
210    def.avail_out = size;
211    def.next_out = blk;
212    ret = recompress(&inf, &def);
213    if (ret == Z_MEM_ERROR)
214        quit("out of memory");
215    assert(ret == Z_STREAM_END);    /* otherwise MARGIN too small */
216
217    /* done -- write block to stdout */
218    have = size - def.avail_out;
219    if (fwrite(blk, 1, have, stdout) != have || ferror(stdout))
220        quit("error writing output");
221
222    /* clean up and print results to stderr */
223    free(tmp);
224    ret = inflateEnd(&inf);
225    assert(ret != Z_STREAM_ERROR);
226    ret = deflateEnd(&def);
227    assert(ret != Z_STREAM_ERROR);
228    free(blk);
229    fprintf(stderr,
230            "%u bytes unused out of %u requested (%lu input)\n",
231            size - have, size, def.total_in);
232    return 0;
233}
234