GZIPInputStream.java revision bb65253db600d05ca6eeedc6d076f6af20680f99
1/* 2 * Licensed to the Apache Software Foundation (ASF) under one or more 3 * contributor license agreements. See the NOTICE file distributed with 4 * this work for additional information regarding copyright ownership. 5 * The ASF licenses this file to You under the Apache License, Version 2.0 6 * (the "License"); you may not use this file except in compliance with 7 * the License. You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18package java.util.zip; 19 20import java.io.EOFException; 21import java.io.IOException; 22import java.io.InputStream; 23import java.io.PushbackInputStream; 24import java.nio.ByteOrder; 25import java.util.Arrays; 26import libcore.io.Memory; 27import libcore.io.Streams; 28 29/** 30 * The {@code GZIPInputStream} class is used to read data stored in the GZIP 31 * format, reading and decompressing GZIP data from the underlying stream into 32 * its buffer. 33 * 34 * <h3>Example</h3> 35 * <p>Using {@code GZIPInputStream} is easier than {@link ZipInputStream} 36 * because GZIP is only for compression, and is not a container for multiple files. 37 * This code decompresses the data from a GZIP stream, similar to the {@code gunzip(1)} utility. 38 * <pre> 39 * InputStream is = ... 40 * GZIPInputStream zis = new GZIPInputStream(new BufferedInputStream(is)); 41 * try { 42 * // Reading from 'zis' gets you the uncompressed bytes... 43 * processStream(zis); 44 * } finally { 45 * zis.close(); 46 * } 47 * </pre> 48 * 49 * <p>Note that this class ignores all remaining data at the end of the last 50 * GZIP member. 51 */ 52public class GZIPInputStream extends InflaterInputStream { 53 private static final int FCOMMENT = 16; 54 55 private static final int FEXTRA = 4; 56 57 private static final int FHCRC = 2; 58 59 private static final int FNAME = 8; 60 61 private static final int GZIP_TRAILER_SIZE = 8; 62 63 /** 64 * The magic header for the GZIP format. 65 */ 66 public static final int GZIP_MAGIC = 0x8b1f; 67 68 /** 69 * The checksum algorithm used when handling uncompressed data. 70 */ 71 protected CRC32 crc = new CRC32(); 72 73 /** 74 * Indicates the end of the input stream. 75 */ 76 protected boolean eos = false; 77 78 /** 79 * Construct a {@code GZIPInputStream} to read from GZIP data from the 80 * underlying stream. 81 * 82 * @param is 83 * the {@code InputStream} to read data from. 84 * @throws IOException 85 * if an {@code IOException} occurs. 86 */ 87 public GZIPInputStream(InputStream is) throws IOException { 88 this(is, BUF_SIZE); 89 } 90 91 /** 92 * Construct a {@code GZIPInputStream} to read from GZIP data from the 93 * underlying stream. Set the internal buffer size to {@code size}. 94 * 95 * @param is 96 * the {@code InputStream} to read data from. 97 * @param size 98 * the internal read buffer size. 99 * @throws IOException 100 * if an {@code IOException} occurs. 101 */ 102 public GZIPInputStream(InputStream is, int size) throws IOException { 103 super(is, new Inflater(true), size); 104 105 try { 106 byte[] header = readHeader(is); 107 final short magic = Memory.peekShort(header, 0, ByteOrder.LITTLE_ENDIAN); 108 if (magic != (short) GZIP_MAGIC) { 109 throw new IOException(String.format("unknown format (magic number %x)", magic)); 110 } 111 112 parseGzipHeader(is, header, crc, buf); 113 } catch (IOException e) { 114 close(); // release the inflater 115 throw e; 116 } 117 } 118 119 /** 120 * Closes this stream and any underlying streams. 121 */ 122 @Override 123 public void close() throws IOException { 124 eos = true; 125 super.close(); 126 } 127 128 @Override 129 public int read(byte[] buffer, int byteOffset, int byteCount) throws IOException { 130 if (closed) { 131 throw new IOException("Stream is closed"); 132 } 133 if (eos) { 134 return -1; 135 } 136 Arrays.checkOffsetAndCount(buffer.length, byteOffset, byteCount); 137 138 int bytesRead; 139 try { 140 bytesRead = super.read(buffer, byteOffset, byteCount); 141 } finally { 142 eos = eof; // update eos after every read(), even when it throws 143 } 144 145 if (bytesRead != -1) { 146 crc.update(buffer, byteOffset, bytesRead); 147 } 148 149 if (eos) { 150 verifyCrc(); 151 eos = maybeReadNextMember(); 152 if (!eos) { 153 crc.reset(); 154 inf.reset(); 155 eof = false; 156 len = 0; 157 } 158 } 159 160 return bytesRead; 161 } 162 163 private boolean maybeReadNextMember() throws IOException { 164 // If we have any unconsumed data in the inflater buffer, we have to 165 // scan that first. The fact that we've reached here implies we've 166 // successfully consumed the GZIP trailer. 167 final int remaining = inf.getRemaining() - GZIP_TRAILER_SIZE; 168 if (remaining > 0) { 169 // NOTE: We make sure we create a pushback stream exactly once, 170 // even if the input stream contains multiple members. 171 // 172 // The push back stream we create must therefore be able to contain 173 // (worst case) the entire buffer even though there may be fewer bytes 174 // remaining when it is first created. 175 if (!(in instanceof PushbackInputStream)) { 176 in = new PushbackInputStream(in, buf.length); 177 } 178 ((PushbackInputStream) in).unread(buf, 179 inf.getCurrentOffset() + GZIP_TRAILER_SIZE, remaining); 180 } 181 182 final byte[] buffer; 183 try { 184 buffer = readHeader(in); 185 } catch (EOFException eof) { 186 // We've reached the end of the stream and there are no more members 187 // to read. Note that we might also hit this if there are fewer than 188 // GZIP_HEADER_LENGTH bytes at the end of a member. We don't care 189 // because we're specified to ignore all data at the end of the last 190 // gzip record. 191 return true; 192 } 193 194 final short magic = Memory.peekShort(buffer, 0, ByteOrder.LITTLE_ENDIAN); 195 if (magic != (short) GZIP_MAGIC) { 196 // Don't throw here because we've already read one valid member 197 // from this stream. 198 return true; 199 } 200 201 // We've encountered the gzip magic number, so we assume there's another 202 // member in the stream. 203 parseGzipHeader(in, buffer, crc, buf); 204 return false; 205 } 206 207 private static byte[] readHeader(InputStream in) throws IOException { 208 byte[] header = new byte[10]; 209 Streams.readFully(in, header, 0, header.length); 210 return header; 211 } 212 213 private static void parseGzipHeader(InputStream in, byte[] header, 214 CRC32 crc, byte[] scratch) throws IOException { 215 final byte flags = header[3]; 216 final boolean hcrc = (flags & FHCRC) != 0; 217 if (hcrc) { 218 crc.update(header, 0, header.length); 219 } 220 if ((flags & FEXTRA) != 0) { 221 Streams.readFully(in, header, 0, 2); 222 if (hcrc) { 223 crc.update(header, 0, 2); 224 } 225 int length = Memory.peekShort(scratch, 0, ByteOrder.LITTLE_ENDIAN) & 0xffff; 226 while (length > 0) { 227 int max = length > scratch.length ? scratch.length : length; 228 int result = in.read(scratch, 0, max); 229 if (result == -1) { 230 throw new EOFException(); 231 } 232 if (hcrc) { 233 crc.update(scratch, 0, result); 234 } 235 length -= result; 236 } 237 } 238 if ((flags & FNAME) != 0) { 239 readZeroTerminated(in, crc, hcrc); 240 } 241 if ((flags & FCOMMENT) != 0) { 242 readZeroTerminated(in, crc, hcrc); 243 } 244 if (hcrc) { 245 Streams.readFully(in, header, 0, 2); 246 short crc16 = Memory.peekShort(scratch, 0, ByteOrder.LITTLE_ENDIAN); 247 if ((short) crc.getValue() != crc16) { 248 throw new IOException("CRC mismatch"); 249 } 250 crc.reset(); 251 } 252 } 253 254 private void verifyCrc() throws IOException { 255 // Get non-compressed bytes read by fill 256 int size = inf.getRemaining(); 257 final int trailerSize = 8; // crc (4 bytes) + total out (4 bytes) 258 byte[] b = new byte[trailerSize]; 259 int copySize = (size > trailerSize) ? trailerSize : size; 260 261 System.arraycopy(buf, len - size, b, 0, copySize); 262 Streams.readFully(in, b, copySize, trailerSize - copySize); 263 264 if (Memory.peekInt(b, 0, ByteOrder.LITTLE_ENDIAN) != (int) crc.getValue()) { 265 throw new IOException("CRC mismatch"); 266 } 267 if (Memory.peekInt(b, 4, ByteOrder.LITTLE_ENDIAN) != inf.getTotalOut()) { 268 throw new IOException("Size mismatch"); 269 } 270 } 271 272 private static void readZeroTerminated(InputStream in, CRC32 crc, boolean hcrc) 273 throws IOException { 274 int result; 275 // TODO: Fix these single byte reads. This method is used to consume the 276 // header FNAME & FCOMMENT which aren't widely used in gzip files. 277 while ((result = in.read()) > 0) { 278 if (hcrc) { 279 crc.update(result); 280 } 281 } 282 if (result == -1) { 283 throw new EOFException(); 284 } 285 // Add the zero 286 if (hcrc) { 287 crc.update(result); 288 } 289 } 290} 291