1/* gzread.c -- zlib functions for reading gzip files
2 * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013 Mark Adler
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6#include "gzguts.h"
7
8/* Local functions */
9local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
10local int gz_avail OF((gz_statep));
11local int gz_look OF((gz_statep));
12local int gz_decomp OF((gz_statep));
13local int gz_fetch OF((gz_statep));
14local int gz_skip OF((gz_statep, z_off64_t));
15
16/* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
17   state->fd, and update state->eof, state->err, and state->msg as appropriate.
18   This function needs to loop on read(), since read() is not guaranteed to
19   read the number of bytes requested, depending on the type of descriptor. */
20local int gz_load(state, buf, len, have)
21    gz_statep state;
22    unsigned char *buf;
23    unsigned len;
24    unsigned *have;
25{
26    int ret;
27
28    *have = 0;
29    do {
30        ret = read(state->fd, buf + *have, len - *have);
31        if (ret <= 0)
32            break;
33        *have += ret;
34    } while (*have < len);
35    if (ret < 0) {
36        gz_error(state, Z_ERRNO, zstrerror());
37        return -1;
38    }
39    if (ret == 0)
40        state->eof = 1;
41    return 0;
42}
43
44/* Load up input buffer and set eof flag if last data loaded -- return -1 on
45   error, 0 otherwise.  Note that the eof flag is set when the end of the input
46   file is reached, even though there may be unused data in the buffer.  Once
47   that data has been used, no more attempts will be made to read the file.
48   If strm->avail_in != 0, then the current data is moved to the beginning of
49   the input buffer, and then the remainder of the buffer is loaded with the
50   available data from the input file. */
51local int gz_avail(state)
52    gz_statep state;
53{
54    unsigned got;
55    z_streamp strm = &(state->strm);
56
57    if (state->err != Z_OK && state->err != Z_BUF_ERROR)
58        return -1;
59    if (state->eof == 0) {
60        if (strm->avail_in) {       /* copy what's there to the start */
61            unsigned char *p = state->in;
62            unsigned const char *q = strm->next_in;
63            unsigned n = strm->avail_in;
64            do {
65                *p++ = *q++;
66            } while (--n);
67        }
68        if (gz_load(state, state->in + strm->avail_in,
69                    state->size - strm->avail_in, &got) == -1)
70            return -1;
71        strm->avail_in += got;
72        strm->next_in = state->in;
73    }
74    return 0;
75}
76
77/* Look for gzip header, set up for inflate or copy.  state->x.have must be 0.
78   If this is the first time in, allocate required memory.  state->how will be
79   left unchanged if there is no more input data available, will be set to COPY
80   if there is no gzip header and direct copying will be performed, or it will
81   be set to GZIP for decompression.  If direct copying, then leftover input
82   data from the input buffer will be copied to the output buffer.  In that
83   case, all further file reads will be directly to either the output buffer or
84   a user buffer.  If decompressing, the inflate state will be initialized.
85   gz_look() will return 0 on success or -1 on failure. */
86local int gz_look(state)
87    gz_statep state;
88{
89    z_streamp strm = &(state->strm);
90
91    /* allocate read buffers and inflate memory */
92    if (state->size == 0) {
93        /* allocate buffers */
94        state->in = (unsigned char *)malloc(state->want);
95        state->out = (unsigned char *)malloc(state->want << 1);
96        if (state->in == NULL || state->out == NULL) {
97            if (state->out != NULL)
98                free(state->out);
99            if (state->in != NULL)
100                free(state->in);
101            gz_error(state, Z_MEM_ERROR, "out of memory");
102            return -1;
103        }
104        state->size = state->want;
105
106        /* allocate inflate memory */
107        state->strm.zalloc = Z_NULL;
108        state->strm.zfree = Z_NULL;
109        state->strm.opaque = Z_NULL;
110        state->strm.avail_in = 0;
111        state->strm.next_in = Z_NULL;
112        if (inflateInit2(&(state->strm), 15 + 16) != Z_OK) {    /* gunzip */
113            free(state->out);
114            free(state->in);
115            state->size = 0;
116            gz_error(state, Z_MEM_ERROR, "out of memory");
117            return -1;
118        }
119    }
120
121    /* get at least the magic bytes in the input buffer */
122    if (strm->avail_in < 2) {
123        if (gz_avail(state) == -1)
124            return -1;
125        if (strm->avail_in == 0)
126            return 0;
127    }
128
129    /* look for gzip magic bytes -- if there, do gzip decoding (note: there is
130       a logical dilemma here when considering the case of a partially written
131       gzip file, to wit, if a single 31 byte is written, then we cannot tell
132       whether this is a single-byte file, or just a partially written gzip
133       file -- for here we assume that if a gzip file is being written, then
134       the header will be written in a single operation, so that reading a
135       single byte is sufficient indication that it is not a gzip file) */
136    if (strm->avail_in > 1 &&
137            strm->next_in[0] == 31 && strm->next_in[1] == 139) {
138        inflateReset(strm);
139        state->how = GZIP;
140        state->direct = 0;
141        return 0;
142    }
143
144    /* no gzip header -- if we were decoding gzip before, then this is trailing
145       garbage.  Ignore the trailing garbage and finish. */
146    if (state->direct == 0) {
147        strm->avail_in = 0;
148        state->eof = 1;
149        state->x.have = 0;
150        return 0;
151    }
152
153    /* doing raw i/o, copy any leftover input to output -- this assumes that
154       the output buffer is larger than the input buffer, which also assures
155       space for gzungetc() */
156    state->x.next = state->out;
157    if (strm->avail_in) {
158        memcpy(state->x.next, strm->next_in, strm->avail_in);
159        state->x.have = strm->avail_in;
160        strm->avail_in = 0;
161    }
162    state->how = COPY;
163    state->direct = 1;
164    return 0;
165}
166
167/* Decompress from input to the provided next_out and avail_out in the state.
168   On return, state->x.have and state->x.next point to the just decompressed
169   data.  If the gzip stream completes, state->how is reset to LOOK to look for
170   the next gzip stream or raw data, once state->x.have is depleted.  Returns 0
171   on success, -1 on failure. */
172local int gz_decomp(state)
173    gz_statep state;
174{
175    int ret = Z_OK;
176    unsigned had;
177    z_streamp strm = &(state->strm);
178
179    /* fill output buffer up to end of deflate stream */
180    had = strm->avail_out;
181    do {
182        /* get more input for inflate() */
183        if (strm->avail_in == 0 && gz_avail(state) == -1)
184            return -1;
185        if (strm->avail_in == 0) {
186            gz_error(state, Z_BUF_ERROR, "unexpected end of file");
187            break;
188        }
189
190        /* decompress and handle errors */
191        ret = inflate(strm, Z_NO_FLUSH);
192        if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
193            gz_error(state, Z_STREAM_ERROR,
194                     "internal error: inflate stream corrupt");
195            return -1;
196        }
197        if (ret == Z_MEM_ERROR) {
198            gz_error(state, Z_MEM_ERROR, "out of memory");
199            return -1;
200        }
201        if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
202            gz_error(state, Z_DATA_ERROR,
203                     strm->msg == NULL ? "compressed data error" : strm->msg);
204            return -1;
205        }
206    } while (strm->avail_out && ret != Z_STREAM_END);
207
208    /* update available output */
209    state->x.have = had - strm->avail_out;
210    state->x.next = strm->next_out - state->x.have;
211
212    /* if the gzip stream completed successfully, look for another */
213    if (ret == Z_STREAM_END)
214        state->how = LOOK;
215
216    /* good decompression */
217    return 0;
218}
219
220/* Fetch data and put it in the output buffer.  Assumes state->x.have is 0.
221   Data is either copied from the input file or decompressed from the input
222   file depending on state->how.  If state->how is LOOK, then a gzip header is
223   looked for to determine whether to copy or decompress.  Returns -1 on error,
224   otherwise 0.  gz_fetch() will leave state->how as COPY or GZIP unless the
225   end of the input file has been reached and all data has been processed.  */
226local int gz_fetch(state)
227    gz_statep state;
228{
229    z_streamp strm = &(state->strm);
230
231    do {
232        switch(state->how) {
233        case LOOK:      /* -> LOOK, COPY (only if never GZIP), or GZIP */
234            if (gz_look(state) == -1)
235                return -1;
236            if (state->how == LOOK)
237                return 0;
238            break;
239        case COPY:      /* -> COPY */
240            if (gz_load(state, state->out, state->size << 1, &(state->x.have))
241                    == -1)
242                return -1;
243            state->x.next = state->out;
244            return 0;
245        case GZIP:      /* -> GZIP or LOOK (if end of gzip stream) */
246            strm->avail_out = state->size << 1;
247            strm->next_out = state->out;
248            if (gz_decomp(state) == -1)
249                return -1;
250        }
251    } while (state->x.have == 0 && (!state->eof || strm->avail_in));
252    return 0;
253}
254
255/* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
256local int gz_skip(state, len)
257    gz_statep state;
258    z_off64_t len;
259{
260    unsigned n;
261
262    /* skip over len bytes or reach end-of-file, whichever comes first */
263    while (len)
264        /* skip over whatever is in output buffer */
265        if (state->x.have) {
266            n = GT_OFF(state->x.have) || (z_off64_t)state->x.have > len ?
267                (unsigned)len : state->x.have;
268            state->x.have -= n;
269            state->x.next += n;
270            state->x.pos += n;
271            len -= n;
272        }
273
274        /* output buffer empty -- return if we're at the end of the input */
275        else if (state->eof && state->strm.avail_in == 0)
276            break;
277
278        /* need more data to skip -- load up output buffer */
279        else {
280            /* get more output, looking for header if required */
281            if (gz_fetch(state) == -1)
282                return -1;
283        }
284    return 0;
285}
286
287/* -- see zlib.h -- */
288int ZEXPORT gzread(file, buf, len)
289    gzFile file;
290    voidp buf;
291    unsigned len;
292{
293    unsigned got, n;
294    gz_statep state;
295    z_streamp strm;
296
297    /* get internal structure */
298    if (file == NULL)
299        return -1;
300    state = (gz_statep)file;
301    strm = &(state->strm);
302
303    /* check that we're reading and that there's no (serious) error */
304    if (state->mode != GZ_READ ||
305            (state->err != Z_OK && state->err != Z_BUF_ERROR))
306        return -1;
307
308    /* since an int is returned, make sure len fits in one, otherwise return
309       with an error (this avoids the flaw in the interface) */
310    if ((int)len < 0) {
311        gz_error(state, Z_DATA_ERROR, "requested length does not fit in int");
312        return -1;
313    }
314
315    /* if len is zero, avoid unnecessary operations */
316    if (len == 0)
317        return 0;
318
319    /* process a skip request */
320    if (state->seek) {
321        state->seek = 0;
322        if (gz_skip(state, state->skip) == -1)
323            return -1;
324    }
325
326    /* get len bytes to buf, or less than len if at the end */
327    got = 0;
328    do {
329        /* first just try copying data from the output buffer */
330        if (state->x.have) {
331            n = state->x.have > len ? len : state->x.have;
332            memcpy(buf, state->x.next, n);
333            state->x.next += n;
334            state->x.have -= n;
335        }
336
337        /* output buffer empty -- return if we're at the end of the input */
338        else if (state->eof && strm->avail_in == 0) {
339            state->past = 1;        /* tried to read past end */
340            break;
341        }
342
343        /* need output data -- for small len or new stream load up our output
344           buffer */
345        else if (state->how == LOOK || len < (state->size << 1)) {
346            /* get more output, looking for header if required */
347            if (gz_fetch(state) == -1)
348                return -1;
349            continue;       /* no progress yet -- go back to copy above */
350            /* the copy above assures that we will leave with space in the
351               output buffer, allowing at least one gzungetc() to succeed */
352        }
353
354        /* large len -- read directly into user buffer */
355        else if (state->how == COPY) {      /* read directly */
356            if (gz_load(state, (unsigned char *)buf, len, &n) == -1)
357                return -1;
358        }
359
360        /* large len -- decompress directly into user buffer */
361        else {  /* state->how == GZIP */
362            strm->avail_out = len;
363            strm->next_out = (unsigned char *)buf;
364            if (gz_decomp(state) == -1)
365                return -1;
366            n = state->x.have;
367            state->x.have = 0;
368        }
369
370        /* update progress */
371        len -= n;
372        buf = (char *)buf + n;
373        got += n;
374        state->x.pos += n;
375    } while (len);
376
377    /* return number of bytes read into user buffer (will fit in int) */
378    return (int)got;
379}
380
381/* -- see zlib.h -- */
382#ifdef Z_PREFIX_SET
383#  undef z_gzgetc
384#else
385#  undef gzgetc
386#endif
387int ZEXPORT gzgetc(file)
388    gzFile file;
389{
390    int ret;
391    unsigned char buf[1];
392    gz_statep state;
393
394    /* get internal structure */
395    if (file == NULL)
396        return -1;
397    state = (gz_statep)file;
398
399    /* check that we're reading and that there's no (serious) error */
400    if (state->mode != GZ_READ ||
401        (state->err != Z_OK && state->err != Z_BUF_ERROR))
402        return -1;
403
404    /* try output buffer (no need to check for skip request) */
405    if (state->x.have) {
406        state->x.have--;
407        state->x.pos++;
408        return *(state->x.next)++;
409    }
410
411    /* nothing there -- try gzread() */
412    ret = gzread(file, buf, 1);
413    return ret < 1 ? -1 : buf[0];
414}
415
416int ZEXPORT gzgetc_(file)
417gzFile file;
418{
419    return gzgetc(file);
420}
421
422/* -- see zlib.h -- */
423int ZEXPORT gzungetc(c, file)
424    int c;
425    gzFile file;
426{
427    gz_statep state;
428
429    /* get internal structure */
430    if (file == NULL)
431        return -1;
432    state = (gz_statep)file;
433
434    /* check that we're reading and that there's no (serious) error */
435    if (state->mode != GZ_READ ||
436        (state->err != Z_OK && state->err != Z_BUF_ERROR))
437        return -1;
438
439    /* process a skip request */
440    if (state->seek) {
441        state->seek = 0;
442        if (gz_skip(state, state->skip) == -1)
443            return -1;
444    }
445
446    /* can't push EOF */
447    if (c < 0)
448        return -1;
449
450    /* if output buffer empty, put byte at end (allows more pushing) */
451    if (state->x.have == 0) {
452        state->x.have = 1;
453        state->x.next = state->out + (state->size << 1) - 1;
454        state->x.next[0] = c;
455        state->x.pos--;
456        state->past = 0;
457        return c;
458    }
459
460    /* if no room, give up (must have already done a gzungetc()) */
461    if (state->x.have == (state->size << 1)) {
462        gz_error(state, Z_DATA_ERROR, "out of room to push characters");
463        return -1;
464    }
465
466    /* slide output data if needed and insert byte before existing data */
467    if (state->x.next == state->out) {
468        unsigned char *src = state->out + state->x.have;
469        unsigned char *dest = state->out + (state->size << 1);
470        while (src > state->out)
471            *--dest = *--src;
472        state->x.next = dest;
473    }
474    state->x.have++;
475    state->x.next--;
476    state->x.next[0] = c;
477    state->x.pos--;
478    state->past = 0;
479    return c;
480}
481
482/* -- see zlib.h -- */
483char * ZEXPORT gzgets(file, buf, len)
484    gzFile file;
485    char *buf;
486    int len;
487{
488    unsigned left, n;
489    char *str;
490    unsigned char *eol;
491    gz_statep state;
492
493    /* check parameters and get internal structure */
494    if (file == NULL || buf == NULL || len < 1)
495        return NULL;
496    state = (gz_statep)file;
497
498    /* check that we're reading and that there's no (serious) error */
499    if (state->mode != GZ_READ ||
500        (state->err != Z_OK && state->err != Z_BUF_ERROR))
501        return NULL;
502
503    /* process a skip request */
504    if (state->seek) {
505        state->seek = 0;
506        if (gz_skip(state, state->skip) == -1)
507            return NULL;
508    }
509
510    /* copy output bytes up to new line or len - 1, whichever comes first --
511       append a terminating zero to the string (we don't check for a zero in
512       the contents, let the user worry about that) */
513    str = buf;
514    left = (unsigned)len - 1;
515    if (left) do {
516        /* assure that something is in the output buffer */
517        if (state->x.have == 0 && gz_fetch(state) == -1)
518            return NULL;                /* error */
519        if (state->x.have == 0) {       /* end of file */
520            state->past = 1;            /* read past end */
521            break;                      /* return what we have */
522        }
523
524        /* look for end-of-line in current output buffer */
525        n = state->x.have > left ? left : state->x.have;
526        eol = (unsigned char *)memchr(state->x.next, '\n', n);
527        if (eol != NULL)
528            n = (unsigned)(eol - state->x.next) + 1;
529
530        /* copy through end-of-line, or remainder if not found */
531        memcpy(buf, state->x.next, n);
532        state->x.have -= n;
533        state->x.next += n;
534        state->x.pos += n;
535        left -= n;
536        buf += n;
537    } while (left && eol == NULL);
538
539    /* return terminated string, or if nothing, end of file */
540    if (buf == str)
541        return NULL;
542    buf[0] = 0;
543    return str;
544}
545
546/* -- see zlib.h -- */
547int ZEXPORT gzdirect(file)
548    gzFile file;
549{
550    gz_statep state;
551
552    /* get internal structure */
553    if (file == NULL)
554        return 0;
555    state = (gz_statep)file;
556
557    /* if the state is not known, but we can find out, then do so (this is
558       mainly for right after a gzopen() or gzdopen()) */
559    if (state->mode == GZ_READ && state->how == LOOK && state->x.have == 0)
560        (void)gz_look(state);
561
562    /* return 1 if transparent, 0 if processing a gzip stream */
563    return state->direct;
564}
565
566/* -- see zlib.h -- */
567int ZEXPORT gzclose_r(file)
568    gzFile file;
569{
570    int ret, err;
571    gz_statep state;
572
573    /* get internal structure */
574    if (file == NULL)
575        return Z_STREAM_ERROR;
576    state = (gz_statep)file;
577
578    /* check that we're reading */
579    if (state->mode != GZ_READ)
580        return Z_STREAM_ERROR;
581
582    /* free memory and close file */
583    if (state->size) {
584        inflateEnd(&(state->strm));
585        free(state->out);
586        free(state->in);
587    }
588    err = state->err == Z_BUF_ERROR ? Z_BUF_ERROR : Z_OK;
589    gz_error(state, Z_OK, NULL);
590    free(state->path);
591    ret = close(state->fd);
592    free(state);
593    return ret ? Z_ERRNO : err;
594}
595