ZipInputStream.java revision 51b1b6997fd3f980076b8081f7f1165ccc2a4008
1/*
2 * Copyright (c) 1996, 2009, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.  Oracle designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Oracle in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
22 * or visit www.oracle.com if you need additional information or have any
23 * questions.
24 */
25
26package java.util.zip;
27
28import java.io.InputStream;
29import java.io.IOException;
30import java.io.EOFException;
31import java.io.PushbackInputStream;
32import java.nio.charset.Charset;
33import java.nio.charset.StandardCharsets;
34import static java.util.zip.ZipConstants64.*;
35
36/**
37 * This class implements an input stream filter for reading files in the
38 * ZIP file format. Includes support for both compressed and uncompressed
39 * entries.
40 *
41 * @author      David Connelly
42 */
43public
44class ZipInputStream extends InflaterInputStream implements ZipConstants {
45    private ZipEntry entry;
46    private int flag;
47    private CRC32 crc = new CRC32();
48    private long remaining;
49    private byte[] tmpbuf = new byte[512];
50
51    private static final int STORED = ZipEntry.STORED;
52    private static final int DEFLATED = ZipEntry.DEFLATED;
53
54    private boolean closed = false;
55    // this flag is set to true after EOF has reached for
56    // one entry
57    private boolean entryEOF = false;
58
59    private ZipCoder zc;
60
61    /**
62     * Check to make sure that this stream has not been closed
63     */
64    private void ensureOpen() throws IOException {
65        if (closed) {
66            throw new IOException("Stream closed");
67        }
68    }
69
70    /**
71     * Creates a new ZIP input stream.
72     *
73     * <p>The UTF-8 {@link java.nio.charset.Charset charset} is used to
74     * decode the entry names.
75     *
76     * @param in the actual input stream
77     */
78    public ZipInputStream(InputStream in) {
79        this(in, StandardCharsets.UTF_8);
80    }
81
82    /**
83     * Creates a new ZIP input stream.
84     *
85     * @param in the actual input stream
86     *
87     * @param charset
88     *        The {@linkplain java.nio.charset.Charset charset} to be
89     *        used to decode the ZIP entry name (ignored if the
90     *        <a href="package-summary.html#lang_encoding"> language
91     *        encoding bit</a> of the ZIP entry's general purpose bit
92     *        flag is set).
93     *
94     * @since 1.7
95     */
96    public ZipInputStream(InputStream in, Charset charset) {
97        super(new PushbackInputStream(in, 512), new Inflater(true), 512);
98        usesDefaultInflater = true;
99        if(in == null) {
100            throw new NullPointerException("in is null");
101        }
102        if (charset == null)
103            throw new NullPointerException("charset is null");
104        this.zc = ZipCoder.get(charset);
105    }
106
107    /**
108     * Reads the next ZIP file entry and positions the stream at the
109     * beginning of the entry data.
110     * @return the next ZIP file entry, or null if there are no more entries
111     * @exception ZipException if a ZIP file error has occurred
112     * @exception IOException if an I/O error has occurred
113     */
114    public ZipEntry getNextEntry() throws IOException {
115        ensureOpen();
116        if (entry != null) {
117            closeEntry();
118        }
119        crc.reset();
120        inf.reset();
121        if ((entry = readLOC()) == null) {
122            return null;
123        }
124        if (entry.method == STORED) {
125            remaining = entry.size;
126        }
127        entryEOF = false;
128        return entry;
129    }
130
131    /**
132     * Closes the current ZIP entry and positions the stream for reading the
133     * next entry.
134     * @exception ZipException if a ZIP file error has occurred
135     * @exception IOException if an I/O error has occurred
136     */
137    public void closeEntry() throws IOException {
138        ensureOpen();
139        while (read(tmpbuf, 0, tmpbuf.length) != -1) ;
140        entryEOF = true;
141    }
142
143    /**
144     * Returns 0 after EOF has reached for the current entry data,
145     * otherwise always return 1.
146     * <p>
147     * Programs should not count on this method to return the actual number
148     * of bytes that could be read without blocking.
149     *
150     * @return     1 before EOF and 0 after EOF has reached for current entry.
151     * @exception  IOException  if an I/O error occurs.
152     *
153     */
154    public int available() throws IOException {
155        ensureOpen();
156        if (entryEOF) {
157            return 0;
158        } else {
159            return 1;
160        }
161    }
162
163    /**
164     * Reads from the current ZIP entry into an array of bytes.
165     * If <code>len</code> is not zero, the method
166     * blocks until some input is available; otherwise, no
167     * bytes are read and <code>0</code> is returned.
168     * @param b the buffer into which the data is read
169     * @param off the start offset in the destination array <code>b</code>
170     * @param len the maximum number of bytes read
171     * @return the actual number of bytes read, or -1 if the end of the
172     *         entry is reached
173     * @exception  NullPointerException if <code>b</code> is <code>null</code>.
174     * @exception  IndexOutOfBoundsException if <code>off</code> is negative,
175     * <code>len</code> is negative, or <code>len</code> is greater than
176     * <code>b.length - off</code>
177     * @exception ZipException if a ZIP file error has occurred
178     * @exception IOException if an I/O error has occurred
179     */
180    public int read(byte[] b, int off, int len) throws IOException {
181        ensureOpen();
182        if (off < 0 || len < 0 || off > b.length - len) {
183            throw new IndexOutOfBoundsException();
184        } else if (len == 0) {
185            return 0;
186        }
187
188        if (entry == null) {
189            return -1;
190        }
191        switch (entry.method) {
192        case DEFLATED:
193            len = super.read(b, off, len);
194            if (len == -1) {
195                readEnd(entry);
196                entryEOF = true;
197                entry = null;
198            } else {
199                crc.update(b, off, len);
200            }
201            return len;
202        case STORED:
203            if (remaining <= 0) {
204                entryEOF = true;
205                entry = null;
206                return -1;
207            }
208            if (len > remaining) {
209                len = (int)remaining;
210            }
211            len = in.read(b, off, len);
212            if (len == -1) {
213                throw new ZipException("unexpected EOF");
214            }
215            crc.update(b, off, len);
216            remaining -= len;
217            if (remaining == 0 && entry.crc != crc.getValue()) {
218                throw new ZipException(
219                    "invalid entry CRC (expected 0x" + Long.toHexString(entry.crc) +
220                    " but got 0x" + Long.toHexString(crc.getValue()) + ")");
221            }
222            return len;
223        default:
224            throw new ZipException("invalid compression method");
225        }
226    }
227
228    /**
229     * Skips specified number of bytes in the current ZIP entry.
230     * @param n the number of bytes to skip
231     * @return the actual number of bytes skipped
232     * @exception ZipException if a ZIP file error has occurred
233     * @exception IOException if an I/O error has occurred
234     * @exception IllegalArgumentException if n < 0
235     */
236    public long skip(long n) throws IOException {
237        if (n < 0) {
238            throw new IllegalArgumentException("negative skip length");
239        }
240        ensureOpen();
241        int max = (int)Math.min(n, Integer.MAX_VALUE);
242        int total = 0;
243        while (total < max) {
244            int len = max - total;
245            if (len > tmpbuf.length) {
246                len = tmpbuf.length;
247            }
248            len = read(tmpbuf, 0, len);
249            if (len == -1) {
250                entryEOF = true;
251                break;
252            }
253            total += len;
254        }
255        return total;
256    }
257
258    /**
259     * Closes this input stream and releases any system resources associated
260     * with the stream.
261     * @exception IOException if an I/O error has occurred
262     */
263    public void close() throws IOException {
264        if (!closed) {
265            super.close();
266            closed = true;
267        }
268    }
269
270    private byte[] b = new byte[256];
271
272    /*
273     * Reads local file (LOC) header for next entry.
274     */
275    private ZipEntry readLOC() throws IOException {
276        try {
277            readFully(tmpbuf, 0, LOCHDR);
278        } catch (EOFException e) {
279            return null;
280        }
281        if (get32(tmpbuf, 0) != LOCSIG) {
282            return null;
283        }
284        // get flag first, we need check EFS.
285        flag = get16(tmpbuf, LOCFLG);
286        // get the entry name and create the ZipEntry first
287        int len = get16(tmpbuf, LOCNAM);
288        int blen = b.length;
289        if (len > blen) {
290            do
291                blen = blen * 2;
292            while (len > blen);
293            b = new byte[blen];
294        }
295        readFully(b, 0, len);
296        // Force to use UTF-8 if the EFS bit is ON, even the cs is NOT UTF-8
297        ZipEntry e = createZipEntry(((flag & EFS) != 0)
298                                    ? zc.toStringUTF8(b, len)
299                                    : zc.toString(b, len));
300        // now get the remaining fields for the entry
301        if ((flag & 1) == 1) {
302            throw new ZipException("encrypted ZIP entry not supported");
303        }
304        e.method = get16(tmpbuf, LOCHOW);
305        e.time = get32(tmpbuf, LOCTIM);
306        if ((flag & 8) == 8) {
307            /* "Data Descriptor" present */
308            if (e.method != DEFLATED) {
309                throw new ZipException(
310                        "only DEFLATED entries can have EXT descriptor");
311            }
312        } else {
313            e.crc = get32(tmpbuf, LOCCRC);
314            e.csize = get32(tmpbuf, LOCSIZ);
315            e.size = get32(tmpbuf, LOCLEN);
316        }
317        len = get16(tmpbuf, LOCEXT);
318        if (len > 0) {
319            byte[] bb = new byte[len];
320            readFully(bb, 0, len);
321            e.setExtra(bb);
322            // extra fields are in "HeaderID(2)DataSize(2)Data... format
323            if (e.csize == ZIP64_MAGICVAL || e.size == ZIP64_MAGICVAL) {
324                int off = 0;
325                while (off + 4 < len) {
326                    int sz = get16(bb, off + 2);
327                    if (get16(bb, off) == ZIP64_EXTID) {
328                        off += 4;
329                        // LOC extra zip64 entry MUST include BOTH original and
330                        // compressed file size fields
331                        if (sz < 16 || (off + sz) > len ) {
332                            // Invalid zip64 extra fields, simply skip. Even it's
333                            // rare, it's possible the entry size happens to be
334                            // the magic value and it "accidnetly" has some bytes
335                            // in extra match the id.
336                            return e;
337                        }
338                        e.size = get64(bb, off);
339                        e.csize = get64(bb, off + 8);
340                        break;
341                    }
342                    off += (sz + 4);
343                }
344            }
345        }
346        return e;
347    }
348
349    /**
350     * Creates a new <code>ZipEntry</code> object for the specified
351     * entry name.
352     *
353     * @param name the ZIP file entry name
354     * @return the ZipEntry just created
355     */
356    protected ZipEntry createZipEntry(String name) {
357        return new ZipEntry(name);
358    }
359
360    /*
361     * Reads end of deflated entry as well as EXT descriptor if present.
362     */
363    private void readEnd(ZipEntry e) throws IOException {
364        int n = inf.getRemaining();
365        if (n > 0) {
366            ((PushbackInputStream)in).unread(buf, len - n, n);
367        }
368        if ((flag & 8) == 8) {
369            /* "Data Descriptor" present */
370            if (inf.getBytesWritten() > ZIP64_MAGICVAL ||
371                inf.getBytesRead() > ZIP64_MAGICVAL) {
372                // ZIP64 format
373                readFully(tmpbuf, 0, ZIP64_EXTHDR);
374                long sig = get32(tmpbuf, 0);
375                if (sig != EXTSIG) { // no EXTSIG present
376                    e.crc = sig;
377                    e.csize = get64(tmpbuf, ZIP64_EXTSIZ - ZIP64_EXTCRC);
378                    e.size = get64(tmpbuf, ZIP64_EXTLEN - ZIP64_EXTCRC);
379                    ((PushbackInputStream)in).unread(
380                        tmpbuf, ZIP64_EXTHDR - ZIP64_EXTCRC - 1, ZIP64_EXTCRC);
381                } else {
382                    e.crc = get32(tmpbuf, ZIP64_EXTCRC);
383                    e.csize = get64(tmpbuf, ZIP64_EXTSIZ);
384                    e.size = get64(tmpbuf, ZIP64_EXTLEN);
385                }
386            } else {
387                readFully(tmpbuf, 0, EXTHDR);
388                long sig = get32(tmpbuf, 0);
389                if (sig != EXTSIG) { // no EXTSIG present
390                    e.crc = sig;
391                    e.csize = get32(tmpbuf, EXTSIZ - EXTCRC);
392                    e.size = get32(tmpbuf, EXTLEN - EXTCRC);
393                    ((PushbackInputStream)in).unread(
394                                               tmpbuf, EXTHDR - EXTCRC - 1, EXTCRC);
395                } else {
396                    e.crc = get32(tmpbuf, EXTCRC);
397                    e.csize = get32(tmpbuf, EXTSIZ);
398                    e.size = get32(tmpbuf, EXTLEN);
399                }
400            }
401        }
402        if (e.size != inf.getBytesWritten()) {
403            throw new ZipException(
404                "invalid entry size (expected " + e.size +
405                " but got " + inf.getBytesWritten() + " bytes)");
406        }
407        if (e.csize != inf.getBytesRead()) {
408            throw new ZipException(
409                "invalid entry compressed size (expected " + e.csize +
410                " but got " + inf.getBytesRead() + " bytes)");
411        }
412        if (e.crc != crc.getValue()) {
413            throw new ZipException(
414                "invalid entry CRC (expected 0x" + Long.toHexString(e.crc) +
415                " but got 0x" + Long.toHexString(crc.getValue()) + ")");
416        }
417    }
418
419    /*
420     * Reads bytes, blocking until all bytes are read.
421     */
422    private void readFully(byte[] b, int off, int len) throws IOException {
423        while (len > 0) {
424            int n = in.read(b, off, len);
425            if (n == -1) {
426                throw new EOFException();
427            }
428            off += n;
429            len -= n;
430        }
431    }
432
433    /*
434     * Fetches unsigned 16-bit value from byte array at specified offset.
435     * The bytes are assumed to be in Intel (little-endian) byte order.
436     */
437    private static final int get16(byte b[], int off) {
438        return (b[off] & 0xff) | ((b[off+1] & 0xff) << 8);
439    }
440
441    /*
442     * Fetches unsigned 32-bit value from byte array at specified offset.
443     * The bytes are assumed to be in Intel (little-endian) byte order.
444     */
445    private static final long get32(byte b[], int off) {
446        return (get16(b, off) | ((long)get16(b, off+2) << 16)) & 0xffffffffL;
447    }
448
449    /*
450     * Fetches signed 64-bit value from byte array at specified offset.
451     * The bytes are assumed to be in Intel (little-endian) byte order.
452     */
453    private static final long get64(byte b[], int off) {
454        return get32(b, off) | (get32(b, off+4) << 32);
455    }
456}
457