GZIPInputStream.java revision f934c3d2c8dd9e6bc5299cef41adace2a671637d
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18package java.util.zip;
19
20import java.io.EOFException;
21import java.io.IOException;
22import java.io.InputStream;
23import java.nio.ByteOrder;
24import java.util.Arrays;
25import libcore.io.Memory;
26
27/**
28 * The {@code GZIPInputStream} class is used to read data stored in the GZIP
29 * format, reading and decompressing GZIP data from the underlying stream into
30 * its buffer.
31 *
32 * <h3>Example</h3>
33 * <p>Using {@code GZIPInputStream} is easier than {@link ZipInputStream}
34 * because GZIP is only for compression, and is not a container for multiple files.
35 * This code decompresses the data from a GZIP stream, similar to the {@code gunzip(1)} utility.
36 * <pre>
37 * InputStream is = ...
38 * GZIPInputStream zis = new GZIPInputStream(new BufferedInputStream(is));
39 * try {
40 *     // Reading from 'zis' gets you the uncompressed bytes...
41 *     processStream(zis);
42 * } finally {
43 *     zis.close();
44 * }
45 * </pre>
46 */
47public class GZIPInputStream extends InflaterInputStream {
48    private static final int FCOMMENT = 16;
49
50    private static final int FEXTRA = 4;
51
52    private static final int FHCRC = 2;
53
54    private static final int FNAME = 8;
55
56    /**
57     * The magic header for the GZIP format.
58     */
59    public static final int GZIP_MAGIC = 0x8b1f;
60
61    /**
62     * The checksum algorithm used when handling uncompressed data.
63     */
64    protected CRC32 crc = new CRC32();
65
66    /**
67     * Indicates the end of the input stream.
68     */
69    protected boolean eos = false;
70
71    /**
72     * Construct a {@code GZIPInputStream} to read from GZIP data from the
73     * underlying stream.
74     *
75     * @param is
76     *            the {@code InputStream} to read data from.
77     * @throws IOException
78     *             if an {@code IOException} occurs.
79     */
80    public GZIPInputStream(InputStream is) throws IOException {
81        this(is, BUF_SIZE);
82    }
83
84    /**
85     * Construct a {@code GZIPInputStream} to read from GZIP data from the
86     * underlying stream. Set the internal buffer size to {@code size}.
87     *
88     * @param is
89     *            the {@code InputStream} to read data from.
90     * @param size
91     *            the internal read buffer size.
92     * @throws IOException
93     *             if an {@code IOException} occurs.
94     */
95    public GZIPInputStream(InputStream is, int size) throws IOException {
96        super(is, new Inflater(true), size);
97        byte[] header = new byte[10];
98        readFully(header, 0, header.length);
99        short magic = Memory.peekShort(header, 0, ByteOrder.LITTLE_ENDIAN);
100        if (magic != (short) GZIP_MAGIC) {
101            throw new IOException(String.format("unknown format (magic number %x)", magic));
102        }
103        int flags = header[3];
104        boolean hcrc = (flags & FHCRC) != 0;
105        if (hcrc) {
106            crc.update(header, 0, header.length);
107        }
108        if ((flags & FEXTRA) != 0) {
109            readFully(header, 0, 2);
110            if (hcrc) {
111                crc.update(header, 0, 2);
112            }
113            int length = Memory.peekShort(header, 0, ByteOrder.LITTLE_ENDIAN) & 0xffff;
114            while (length > 0) {
115                int max = length > buf.length ? buf.length : length;
116                int result = in.read(buf, 0, max);
117                if (result == -1) {
118                    throw new EOFException();
119                }
120                if (hcrc) {
121                    crc.update(buf, 0, result);
122                }
123                length -= result;
124            }
125        }
126        if ((flags & FNAME) != 0) {
127            readZeroTerminated(hcrc);
128        }
129        if ((flags & FCOMMENT) != 0) {
130            readZeroTerminated(hcrc);
131        }
132        if (hcrc) {
133            readFully(header, 0, 2);
134            short crc16 = Memory.peekShort(header, 0, ByteOrder.LITTLE_ENDIAN);
135            if ((short) crc.getValue() != crc16) {
136                throw new IOException("CRC mismatch");
137            }
138            crc.reset();
139        }
140    }
141
142    /**
143     * Closes this stream and any underlying streams.
144     */
145    @Override
146    public void close() throws IOException {
147        eos = true;
148        super.close();
149    }
150
151    /**
152     * Reads and decompresses GZIP data from the underlying stream into the
153     * given buffer.
154     */
155    @Override
156    public int read(byte[] buffer, int offset, int byteCount) throws IOException {
157        if (closed) {
158            throw new IOException("Stream is closed");
159        }
160        if (eos) {
161            return -1;
162        }
163        Arrays.checkOffsetAndCount(buffer.length, offset, byteCount);
164
165        int bytesRead;
166        try {
167            bytesRead = super.read(buffer, offset, byteCount);
168        } finally {
169            eos = eof; // update eos after every read(), even when it throws
170        }
171
172        if (bytesRead != -1) {
173            crc.update(buffer, offset, bytesRead);
174        }
175
176        if (eos) {
177            verifyCrc();
178        }
179
180        return bytesRead;
181    }
182
183    private void verifyCrc() throws IOException {
184        // Get non-compressed bytes read by fill
185        int size = inf.getRemaining();
186        final int trailerSize = 8; // crc (4 bytes) + total out (4 bytes)
187        byte[] b = new byte[trailerSize];
188        int copySize = (size > trailerSize) ? trailerSize : size;
189
190        System.arraycopy(buf, len - size, b, 0, copySize);
191        readFully(b, copySize, trailerSize - copySize);
192
193        if (Memory.peekInt(b, 0, ByteOrder.LITTLE_ENDIAN) != (int) crc.getValue()) {
194            throw new IOException("CRC mismatch");
195        }
196        if (Memory.peekInt(b, 4, ByteOrder.LITTLE_ENDIAN) != inf.getTotalOut()) {
197            throw new IOException("Size mismatch");
198        }
199    }
200
201    private void readFully(byte[] buffer, int offset, int length) throws IOException {
202        int result;
203        while (length > 0) {
204            result = in.read(buffer, offset, length);
205            if (result == -1) {
206                throw new EOFException();
207            }
208            offset += result;
209            length -= result;
210        }
211    }
212
213    private void readZeroTerminated(boolean hcrc) throws IOException {
214        int result;
215        while ((result = in.read()) > 0) {
216            if (hcrc) {
217                crc.update(result);
218            }
219        }
220        if (result == -1) {
221            throw new EOFException();
222        }
223        // Add the zero
224        if (hcrc) {
225            crc.update(result);
226        }
227    }
228}
229