gzread.c revision 00fb66d0b1488e4ca655906f82b27eb2ccc3e309
1/* gzread.c -- zlib functions for reading gzip files
2 * Copyright (C) 2004, 2005, 2010, 2011 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6#include "gzguts.h"
7
8/* Local functions */
9local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
10local int gz_avail OF((gz_statep));
11local int gz_look OF((gz_statep));
12local int gz_decomp OF((gz_statep));
13local int gz_fetch OF((gz_statep));
14local int gz_skip OF((gz_statep, z_off64_t));
15
16/* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
17   state->fd, and update state->eof, state->err, and state->msg as appropriate.
18   This function needs to loop on read(), since read() is not guaranteed to
19   read the number of bytes requested, depending on the type of descriptor. */
20local int gz_load(state, buf, len, have)
21    gz_statep state;
22    unsigned char *buf;
23    unsigned len;
24    unsigned *have;
25{
26    int ret;
27
28    *have = 0;
29    do {
30        ret = read(state->fd, buf + *have, len - *have);
31        if (ret <= 0)
32            break;
33        *have += ret;
34    } while (*have < len);
35    if (ret < 0) {
36        gz_error(state, Z_ERRNO, zstrerror());
37        return -1;
38    }
39    if (ret == 0)
40        state->eof = 1;
41    return 0;
42}
43
44/* Load up input buffer and set eof flag if last data loaded -- return -1 on
45   error, 0 otherwise.  Note that the eof flag is set when the end of the input
46   file is reached, even though there may be unused data in the buffer.  Once
47   that data has been used, no more attempts will be made to read the file.
48   If strm->avail_in != 0, then the current data is moved to the beginning of
49   the input buffer, and then the remainder of the buffer is loaded with the
50   available data from the input file. */
51local int gz_avail(state)
52    gz_statep state;
53{
54    unsigned got;
55    z_streamp strm = &(state->strm);
56
57    if (state->err != Z_OK && state->err != Z_BUF_ERROR)
58        return -1;
59    if (state->eof == 0) {
60        if (strm->avail_in)
61            memmove(state->in, strm->next_in, strm->avail_in);
62        if (gz_load(state, state->in + strm->avail_in,
63                    state->size - strm->avail_in, &got) == -1)
64            return -1;
65        strm->avail_in += got;
66        strm->next_in = state->in;
67    }
68    return 0;
69}
70
71/* Look for gzip header, set up for inflate or copy.  state->x.have must be 0.
72   If this is the first time in, allocate required memory.  state->how will be
73   left unchanged if there is no more input data available, will be set to COPY
74   if there is no gzip header and direct copying will be performed, or it will
75   be set to GZIP for decompression.  If direct copying, then leftover input
76   data from the input buffer will be copied to the output buffer.  In that
77   case, all further file reads will be directly to either the output buffer or
78   a user buffer.  If decompressing, the inflate state will be initialized.
79   gz_look() will return 0 on success or -1 on failure. */
80local int gz_look(state)
81    gz_statep state;
82{
83    z_streamp strm = &(state->strm);
84
85    /* allocate read buffers and inflate memory */
86    if (state->size == 0) {
87        /* allocate buffers */
88        state->in = malloc(state->want);
89        state->out = malloc(state->want << 1);
90        if (state->in == NULL || state->out == NULL) {
91            if (state->out != NULL)
92                free(state->out);
93            if (state->in != NULL)
94                free(state->in);
95            gz_error(state, Z_MEM_ERROR, "out of memory");
96            return -1;
97        }
98        state->size = state->want;
99
100        /* allocate inflate memory */
101        state->strm.zalloc = Z_NULL;
102        state->strm.zfree = Z_NULL;
103        state->strm.opaque = Z_NULL;
104        state->strm.avail_in = 0;
105        state->strm.next_in = Z_NULL;
106        if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) {    /* gunzip */
107            free(state->out);
108            free(state->in);
109            state->size = 0;
110            gz_error(state, Z_MEM_ERROR, "out of memory");
111            return -1;
112        }
113    }
114
115    /* get at least the magic bytes in the input buffer */
116    if (strm->avail_in < 2) {
117        if (gz_avail(state) == -1)
118            return -1;
119        if (strm->avail_in == 0)
120            return 0;
121    }
122
123    /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
124       a logical dilemma here when considering the case of a partially written
125       gzip file, to wit, if a single 31 byte is written, then we cannot tell
126       whether this is a single-byte file, or just a partially written gzip
127       file -- for here we assume that if a gzip file is being written, then
128       the header will be written in a single operation, so that reading a
129       single byte is sufficient indication that it is not a gzip file) */
130    if (strm->avail_in > 1 &&
131            strm->next_in[0] == 31 && strm->next_in[1] == 139) {
132        inflateReset(strm);
133        state->how = GZIP;
134        state->direct = 0;
135        return 0;
136    }
137
138    /* no gzip header -- if we were decoding gzip before, then this is trailing
139       garbage.  Ignore the trailing garbage and finish. */
140    if (state->direct == 0) {
141        strm->avail_in = 0;
142        state->eof = 1;
143        state->x.have = 0;
144        return 0;
145    }
146
147    /* doing raw i/o, copy any leftover input to output -- this assumes that
148       the output buffer is larger than the input buffer, which also assures
149       space for gzungetc() */
150    state->x.next = state->out;
151    if (strm->avail_in) {
152        memcpy(state->x.next, strm->next_in, strm->avail_in);
153        state->x.have = strm->avail_in;
154        strm->avail_in = 0;
155    }
156    state->how = COPY;
157    state->direct = 1;
158    return 0;
159}
160
161/* Decompress from input to the provided next_out and avail_out in the state.
162   On return, state->x.have and state->x.next point to the just decompressed
163   data.  If the gzip stream completes, state->how is reset to LOOK to look for
164   the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
165   on success, -1 on failure. */
166local int gz_decomp(state)
167    gz_statep state;
168{
169    int ret = Z_OK;
170    unsigned had;
171    z_streamp strm = &(state->strm);
172
173    /* fill output buffer up to end of deflate stream */
174    had = strm->avail_out;
175    do {
176        /* get more input for inflate() */
177        if (strm->avail_in == 0 && gz_avail(state) == -1)
178            return -1;
179        if (strm->avail_in == 0) {
180            gz_error(state, Z_BUF_ERROR, "unexpected end of file");
181            break;
182        }
183
184        /* decompress and handle errors */
185        ret = inflate(strm, Z_NO_FLUSH);
186        if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
187            gz_error(state, Z_STREAM_ERROR,
188                     "internal error: inflate stream corrupt");
189            return -1;
190        }
191        if (ret == Z_MEM_ERROR) {
192            gz_error(state, Z_MEM_ERROR, "out of memory");
193            return -1;
194        }
195        if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
196            gz_error(state, Z_DATA_ERROR,
197                     strm->msg == NULL ? "compressed data error" : strm->msg);
198            return -1;
199        }
200    } while (strm->avail_out && ret != Z_STREAM_END);
201
202    /* update available output */
203    state->x.have = had - strm->avail_out;
204    state->x.next = strm->next_out - state->x.have;
205
206    /* if the gzip stream completed successfully, look for another */
207    if (ret == Z_STREAM_END)
208        state->how = LOOK;
209
210    /* good decompression */
211    return 0;
212}
213
214/* Fetch data and put it in the output buffer.  Assumes state->x.have is 0.
215   Data is either copied from the input file or decompressed from the input
216   file depending on state->how.  If state->how is LOOK, then a gzip header is
217   looked for to determine whether to copy or decompress.  Returns -1 on error,
218   otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
219   end of the input file has been reached and all data has been processed.  */
220local int gz_fetch(state)
221    gz_statep state;
222{
223    z_streamp strm = &(state->strm);
224
225    do {
226        switch(state->how) {
227        case LOOK:      /* -> LOOK, COPY (only if never GZIP), or GZIP */
228            if (gz_look(state) == -1)
229                return -1;
230            if (state->how == LOOK)
231                return 0;
232            break;
233        case COPY:      /* -> COPY */
234            if (gz_load(state, state->out, state->size << 1, &(state->x.have))
235                    == -1)
236                return -1;
237            state->x.next = state->out;
238            return 0;
239        case GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
240            strm->avail_out = state->size << 1;
241            strm->next_out = state->out;
242            if (gz_decomp(state) == -1)
243                return -1;
244        }
245    } while (state->x.have == 0 && (!state->eof || strm->avail_in));
246    return 0;
247}
248
249/* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
250local int gz_skip(state, len)
251    gz_statep state;
252    z_off64_t len;
253{
254    unsigned n;
255
256    /* skip over len bytes or reach end-of-file, whichever comes first */
257    while (len)
258        /* skip over whatever is in output buffer */
259        if (state->x.have) {
260            n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
261                (unsigned)len : state->x.have;
262            state->x.have -= n;
263            state->x.next += n;
264            state->x.pos += n;
265            len -= n;
266        }
267
268        /* output buffer empty -- return if we're at the end of the input */
269        else if (state->eof && state->strm.avail_in == 0)
270            break;
271
272        /* need more data to skip -- load up output buffer */
273        else {
274            /* get more output, looking for header if required */
275            if (gz_fetch(state) == -1)
276                return -1;
277        }
278    return 0;
279}
280
281/* -- see zlib.h -- */
282int ZEXPORT gzread(file, buf, len)
283    gzFile file;
284    voidp buf;
285    unsigned len;
286{
287    unsigned got, n;
288    gz_statep state;
289    z_streamp strm;
290
291    /* get internal structure */
292    if (file == NULL)
293        return -1;
294    state = (gz_statep)file;
295    strm = &(state->strm);
296
297    /* check that we're reading and that there's no (serious) error */
298    if (state->mode != GZ_READ ||
299            (state->err != Z_OK && state->err != Z_BUF_ERROR))
300        return -1;
301
302    /* since an int is returned, make sure len fits in one, otherwise return
303       with an error (this avoids the flaw in the interface) */
304    if ((int)len < 0) {
305        gz_error(state, Z_DATA_ERROR, "requested length does not fit in int");
306        return -1;
307    }
308
309    /* if len is zero, avoid unnecessary operations */
310    if (len == 0)
311        return 0;
312
313    /* process a skip request */
314    if (state->seek) {
315        state->seek = 0;
316        if (gz_skip(state, state->skip) == -1)
317            return -1;
318    }
319
320    /* get len bytes to buf, or less than len if at the end */
321    got = 0;
322    do {
323        /* first just try copying data from the output buffer */
324        if (state->x.have) {
325            n = state->x.have > len ? len : state->x.have;
326            memcpy(buf, state->x.next, n);
327            state->x.next += n;
328            state->x.have -= n;
329        }
330
331        /* output buffer empty -- return if we're at the end of the input */
332        else if (state->eof && strm->avail_in == 0) {
333            state->past = 1;        /* tried to read past end */
334            break;
335        }
336
337        /* need output data -- for small len or new stream load up our output
338           buffer */
339        else if (state->how == LOOK || len < (state->size << 1)) {
340            /* get more output, looking for header if required */
341            if (gz_fetch(state) == -1)
342                return -1;
343            continue;       /* no progress yet -- go back to memcpy() above */
344            /* the copy above assures that we will leave with space in the
345               output buffer, allowing at least one gzungetc() to succeed */
346        }
347
348        /* large len -- read directly into user buffer */
349        else if (state->how == COPY) {      /* read directly */
350            if (gz_load(state, buf, len, &n) == -1)
351                return -1;
352        }
353
354        /* large len -- decompress directly into user buffer */
355        else {  /* state->how == GZIP */
356            strm->avail_out = len;
357            strm->next_out = buf;
358            if (gz_decomp(state) == -1)
359                return -1;
360            n = state->x.have;
361            state->x.have = 0;
362        }
363
364        /* update progress */
365        len -= n;
366        buf = (char *)buf + n;
367        got += n;
368        state->x.pos += n;
369    } while (len);
370
371    /* return number of bytes read into user buffer (will fit in int) */
372    return (int)got;
373}
374
375/* -- see zlib.h -- */
376int ZEXPORT gzgetc_(file)
377    gzFile file;
378{
379    int ret;
380    unsigned char buf[1];
381    gz_statep state;
382
383    /* get internal structure */
384    if (file == NULL)
385        return -1;
386    state = (gz_statep)file;
387
388    /* check that we're reading and that there's no (serious) error */
389    if (state->mode != GZ_READ ||
390        (state->err != Z_OK && state->err != Z_BUF_ERROR))
391        return -1;
392
393    /* try output buffer (no need to check for skip request) */
394    if (state->x.have) {
395        state->x.have--;
396        state->x.pos++;
397        return *(state->x.next)++;
398    }
399
400    /* nothing there -- try gzread() */
401    ret = gzread(file, buf, 1);
402    return ret < 1 ? -1 : buf[0];
403}
404
405#undef gzgetc
406int ZEXPORT gzgetc(file)
407gzFile file;
408{
409    return gzgetc_(file);
410}
411
412/* -- see zlib.h -- */
413int ZEXPORT gzungetc(c, file)
414    int c;
415    gzFile file;
416{
417    gz_statep state;
418
419    /* get internal structure */
420    if (file == NULL)
421        return -1;
422    state = (gz_statep)file;
423
424    /* check that we're reading and that there's no (serious) error */
425    if (state->mode != GZ_READ ||
426        (state->err != Z_OK && state->err != Z_BUF_ERROR))
427        return -1;
428
429    /* process a skip request */
430    if (state->seek) {
431        state->seek = 0;
432        if (gz_skip(state, state->skip) == -1)
433            return -1;
434    }
435
436    /* can't push EOF */
437    if (c < 0)
438        return -1;
439
440    /* if output buffer empty, put byte at end (allows more pushing) */
441    if (state->x.have == 0) {
442        state->x.have = 1;
443        state->x.next = state->out + (state->size << 1) - 1;
444        state->x.next[0] = c;
445        state->x.pos--;
446        state->past = 0;
447        return c;
448    }
449
450    /* if no room, give up (must have already done a gzungetc()) */
451    if (state->x.have == (state->size << 1)) {
452        gz_error(state, Z_DATA_ERROR, "out of room to push characters");
453        return -1;
454    }
455
456    /* slide output data if needed and insert byte before existing data */
457    if (state->x.next == state->out) {
458        unsigned char *src = state->out + state->x.have;
459        unsigned char *dest = state->out + (state->size << 1);
460        while (src > state->out)
461            *--dest = *--src;
462        state->x.next = dest;
463    }
464    state->x.have++;
465    state->x.next--;
466    state->x.next[0] = c;
467    state->x.pos--;
468    state->past = 0;
469    return c;
470}
471
472/* -- see zlib.h -- */
473char * ZEXPORT gzgets(file, buf, len)
474    gzFile file;
475    char *buf;
476    int len;
477{
478    unsigned left, n;
479    char *str;
480    unsigned char *eol;
481    gz_statep state;
482
483    /* check parameters and get internal structure */
484    if (file == NULL || buf == NULL || len < 1)
485        return NULL;
486    state = (gz_statep)file;
487
488    /* check that we're reading and that there's no (serious) error */
489    if (state->mode != GZ_READ ||
490        (state->err != Z_OK && state->err != Z_BUF_ERROR))
491        return NULL;
492
493    /* process a skip request */
494    if (state->seek) {
495        state->seek = 0;
496        if (gz_skip(state, state->skip) == -1)
497            return NULL;
498    }
499
500    /* copy output bytes up to new line or len - 1, whichever comes first --
501       append a terminating zero to the string (we don't check for a zero in
502       the contents, let the user worry about that) */
503    str = buf;
504    left = (unsigned)len - 1;
505    if (left) do {
506        /* assure that something is in the output buffer */
507        if (state->x.have == 0 && gz_fetch(state) == -1)
508            return NULL;                /* error */
509        if (state->x.have == 0) {       /* end of file */
510            state->past = 1;            /* read past end */
511            break;                      /* return what we have */
512        }
513
514        /* look for end-of-line in current output buffer */
515        n = state->x.have > left ? left : state->x.have;
516        eol = memchr(state->x.next, '\n', n);
517        if (eol != NULL)
518            n = (unsigned)(eol - state->x.next) + 1;
519
520        /* copy through end-of-line, or remainder if not found */
521        memcpy(buf, state->x.next, n);
522        state->x.have -= n;
523        state->x.next += n;
524        state->x.pos += n;
525        left -= n;
526        buf += n;
527    } while (left && eol == NULL);
528
529    /* return terminated string, or if nothing, end of file */
530    if (buf == str)
531        return NULL;
532    buf[0] = 0;
533    return str;
534}
535
536/* -- see zlib.h -- */
537int ZEXPORT gzdirect(file)
538    gzFile file;
539{
540    gz_statep state;
541
542    /* get internal structure */
543    if (file == NULL)
544        return 0;
545    state = (gz_statep)file;
546
547    /* if the state is not known, but we can find out, then do so (this is
548       mainly for right after a gzopen() or gzdopen()) */
549    if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
550        (void)gz_look(state);
551
552    /* return 1 if transparent, 0 if processing a gzip stream */
553    return state->direct;
554}
555
556/* -- see zlib.h -- */
557int ZEXPORT gzclose_r(file)
558    gzFile file;
559{
560    int ret, err;
561    gz_statep state;
562
563    /* get internal structure */
564    if (file == NULL)
565        return Z_STREAM_ERROR;
566    state = (gz_statep)file;
567
568    /* check that we're reading */
569    if (state->mode != GZ_READ)
570        return Z_STREAM_ERROR;
571
572    /* free memory and close file */
573    if (state->size) {
574        inflateEnd(&(state->strm));
575        free(state->out);
576        free(state->in);
577    }
578    err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
579    gz_error(state, Z_OK, NULL);
580    free(state->path);
581    ret = close(state->fd);
582    free(state);
583    return ret ? Z_ERRNO : err;
584}
585