GZIPInputStream.java revision bb65253db600d05ca6eeedc6d076f6af20680f99
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18package java.util.zip;
19
20import java.io.EOFException;
21import java.io.IOException;
22import java.io.InputStream;
23import java.io.PushbackInputStream;
24import java.nio.ByteOrder;
25import java.util.Arrays;
26import libcore.io.Memory;
27import libcore.io.Streams;
28
29/**
30 * The {@code GZIPInputStream} class is used to read data stored in the GZIP
31 * format, reading and decompressing GZIP data from the underlying stream into
32 * its buffer.
33 *
34 * <h3>Example</h3>
35 * <p>Using {@code GZIPInputStream} is easier than {@link ZipInputStream}
36 * because GZIP is only for compression, and is not a container for multiple files.
37 * This code decompresses the data from a GZIP stream, similar to the {@code gunzip(1)} utility.
38 * <pre>
39 * InputStream is = ...
40 * GZIPInputStream zis = new GZIPInputStream(new BufferedInputStream(is));
41 * try {
42 *     // Reading from 'zis' gets you the uncompressed bytes...
43 *     processStream(zis);
44 * } finally {
45 *     zis.close();
46 * }
47 * </pre>
48 *
49 * <p>Note that this class ignores all remaining data at the end of the last
50 * GZIP member.
51 */
52public class GZIPInputStream extends InflaterInputStream {
53    private static final int FCOMMENT = 16;
54
55    private static final int FEXTRA = 4;
56
57    private static final int FHCRC = 2;
58
59    private static final int FNAME = 8;
60
61    private static final int GZIP_TRAILER_SIZE = 8;
62
63    /**
64     * The magic header for the GZIP format.
65     */
66    public static final int GZIP_MAGIC = 0x8b1f;
67
68    /**
69     * The checksum algorithm used when handling uncompressed data.
70     */
71    protected CRC32 crc = new CRC32();
72
73    /**
74     * Indicates the end of the input stream.
75     */
76    protected boolean eos = false;
77
78    /**
79     * Construct a {@code GZIPInputStream} to read from GZIP data from the
80     * underlying stream.
81     *
82     * @param is
83     *            the {@code InputStream} to read data from.
84     * @throws IOException
85     *             if an {@code IOException} occurs.
86     */
87    public GZIPInputStream(InputStream is) throws IOException {
88        this(is, BUF_SIZE);
89    }
90
91    /**
92     * Construct a {@code GZIPInputStream} to read from GZIP data from the
93     * underlying stream. Set the internal buffer size to {@code size}.
94     *
95     * @param is
96     *            the {@code InputStream} to read data from.
97     * @param size
98     *            the internal read buffer size.
99     * @throws IOException
100     *             if an {@code IOException} occurs.
101     */
102    public GZIPInputStream(InputStream is, int size) throws IOException {
103        super(is, new Inflater(true), size);
104
105        try {
106            byte[] header = readHeader(is);
107            final short magic = Memory.peekShort(header, 0, ByteOrder.LITTLE_ENDIAN);
108            if (magic != (short) GZIP_MAGIC) {
109                throw new IOException(String.format("unknown format (magic number %x)", magic));
110            }
111
112            parseGzipHeader(is, header, crc, buf);
113        } catch (IOException e) {
114            close(); // release the inflater
115            throw e;
116        }
117    }
118
119    /**
120     * Closes this stream and any underlying streams.
121     */
122    @Override
123    public void close() throws IOException {
124        eos = true;
125        super.close();
126    }
127
128    @Override
129    public int read(byte[] buffer, int byteOffset, int byteCount) throws IOException {
130        if (closed) {
131            throw new IOException("Stream is closed");
132        }
133        if (eos) {
134            return -1;
135        }
136        Arrays.checkOffsetAndCount(buffer.length, byteOffset, byteCount);
137
138        int bytesRead;
139        try {
140            bytesRead = super.read(buffer, byteOffset, byteCount);
141        } finally {
142            eos = eof; // update eos after every read(), even when it throws
143        }
144
145        if (bytesRead != -1) {
146            crc.update(buffer, byteOffset, bytesRead);
147        }
148
149        if (eos) {
150            verifyCrc();
151            eos = maybeReadNextMember();
152            if (!eos) {
153                crc.reset();
154                inf.reset();
155                eof = false;
156                len = 0;
157            }
158        }
159
160        return bytesRead;
161    }
162
163    private boolean maybeReadNextMember() throws IOException {
164        // If we have any unconsumed data in the inflater buffer, we have to
165        // scan that first. The fact that we've reached here implies we've
166        // successfully consumed the GZIP trailer.
167        final int remaining = inf.getRemaining() - GZIP_TRAILER_SIZE;
168        if (remaining > 0) {
169            // NOTE: We make sure we create a pushback stream exactly once,
170            // even if the input stream contains multiple members.
171            //
172            // The push back stream we create must therefore be able to contain
173            // (worst case) the entire buffer even though there may be fewer bytes
174            // remaining when it is first created.
175            if (!(in instanceof PushbackInputStream)) {
176                in = new PushbackInputStream(in, buf.length);
177            }
178            ((PushbackInputStream) in).unread(buf,
179                    inf.getCurrentOffset() + GZIP_TRAILER_SIZE, remaining);
180        }
181
182        final byte[] buffer;
183        try {
184            buffer = readHeader(in);
185        } catch (EOFException eof) {
186            // We've reached the end of the stream and there are no more members
187            // to read. Note that we might also hit this if there are fewer than
188            // GZIP_HEADER_LENGTH bytes at the end of a member. We don't care
189            // because we're specified to ignore all data at the end of the last
190            // gzip record.
191            return true;
192        }
193
194        final short magic = Memory.peekShort(buffer, 0, ByteOrder.LITTLE_ENDIAN);
195        if (magic != (short) GZIP_MAGIC) {
196            // Don't throw here because we've already read one valid member
197            // from this stream.
198            return true;
199        }
200
201        // We've encountered the gzip magic number, so we assume there's another
202        // member in the stream.
203        parseGzipHeader(in, buffer, crc, buf);
204        return false;
205    }
206
207    private static byte[] readHeader(InputStream in) throws IOException {
208        byte[] header = new byte[10];
209        Streams.readFully(in, header, 0, header.length);
210        return header;
211    }
212
213    private static void parseGzipHeader(InputStream in, byte[] header,
214            CRC32 crc, byte[] scratch) throws IOException {
215        final byte flags = header[3];
216        final boolean hcrc = (flags & FHCRC) != 0;
217        if (hcrc) {
218            crc.update(header, 0, header.length);
219        }
220        if ((flags & FEXTRA) != 0) {
221            Streams.readFully(in, header, 0, 2);
222            if (hcrc) {
223                crc.update(header, 0, 2);
224            }
225            int length = Memory.peekShort(scratch, 0, ByteOrder.LITTLE_ENDIAN) & 0xffff;
226            while (length > 0) {
227                int max = length > scratch.length ? scratch.length : length;
228                int result = in.read(scratch, 0, max);
229                if (result == -1) {
230                    throw new EOFException();
231                }
232                if (hcrc) {
233                    crc.update(scratch, 0, result);
234                }
235                length -= result;
236            }
237        }
238        if ((flags & FNAME) != 0) {
239            readZeroTerminated(in, crc, hcrc);
240        }
241        if ((flags & FCOMMENT) != 0) {
242            readZeroTerminated(in, crc, hcrc);
243        }
244        if (hcrc) {
245            Streams.readFully(in, header, 0, 2);
246            short crc16 = Memory.peekShort(scratch, 0, ByteOrder.LITTLE_ENDIAN);
247            if ((short) crc.getValue() != crc16) {
248                throw new IOException("CRC mismatch");
249            }
250            crc.reset();
251        }
252    }
253
254    private void verifyCrc() throws IOException {
255        // Get non-compressed bytes read by fill
256        int size = inf.getRemaining();
257        final int trailerSize = 8; // crc (4 bytes) + total out (4 bytes)
258        byte[] b = new byte[trailerSize];
259        int copySize = (size > trailerSize) ? trailerSize : size;
260
261        System.arraycopy(buf, len - size, b, 0, copySize);
262        Streams.readFully(in, b, copySize, trailerSize - copySize);
263
264        if (Memory.peekInt(b, 0, ByteOrder.LITTLE_ENDIAN) != (int) crc.getValue()) {
265            throw new IOException("CRC mismatch");
266        }
267        if (Memory.peekInt(b, 4, ByteOrder.LITTLE_ENDIAN) != inf.getTotalOut()) {
268            throw new IOException("Size mismatch");
269        }
270    }
271
272    private static void readZeroTerminated(InputStream in, CRC32 crc, boolean hcrc)
273            throws IOException {
274        int result;
275        // TODO: Fix these single byte reads. This method is used to consume the
276        // header FNAME & FCOMMENT which aren't widely used in gzip files.
277        while ((result = in.read()) > 0) {
278            if (hcrc) {
279                crc.update(result);
280            }
281        }
282        if (result == -1) {
283            throw new EOFException();
284        }
285        // Add the zero
286        if (hcrc) {
287            crc.update(result);
288        }
289    }
290}
291