1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *     http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18package java.util.zip;
19
20import dalvik.system.CloseGuard;
21import java.io.BufferedInputStream;
22import java.io.Closeable;
23import java.io.DataInputStream;
24import java.io.File;
25import java.io.IOException;
26import java.io.InputStream;
27import java.io.RandomAccessFile;
28import java.nio.ByteOrder;
29import java.nio.charset.StandardCharsets;
30import java.util.Enumeration;
31import java.util.Iterator;
32import java.util.LinkedHashMap;
33import libcore.io.BufferIterator;
34import libcore.io.HeapBufferIterator;
35import libcore.io.IoUtils;
36import libcore.io.Streams;
37
38/**
39 * This class provides random read access to a zip file. You pay more to read
40 * the zip file's central directory up front (from the constructor), but if you're using
41 * {@link #getEntry} to look up multiple files by name, you get the benefit of this index.
42 *
43 * <p>If you only want to iterate through all the files (using {@link #entries()}, you should
44 * consider {@link ZipInputStream}, which provides stream-like read access to a zip file and
45 * has a lower up-front cost because you don't pay to build an in-memory index.
46 *
47 * <p>If you want to create a zip file, use {@link ZipOutputStream}. There is no API for updating
48 * an existing zip file.
49 */
50public class ZipFile implements Closeable, ZipConstants {
51    /**
52     * General Purpose Bit Flags, Bit 0.
53     * If set, indicates that the file is encrypted.
54     */
55    static final int GPBF_ENCRYPTED_FLAG = 1 << 0;
56
57    /**
58     * General Purpose Bit Flags, Bit 3.
59     * If this bit is set, the fields crc-32, compressed
60     * size and uncompressed size are set to zero in the
61     * local header.  The correct values are put in the
62     * data descriptor immediately following the compressed
63     * data.  (Note: PKZIP version 2.04g for DOS only
64     * recognizes this bit for method 8 compression, newer
65     * versions of PKZIP recognize this bit for any
66     * compression method.)
67     */
68    static final int GPBF_DATA_DESCRIPTOR_FLAG = 1 << 3;
69
70    /**
71     * General Purpose Bit Flags, Bit 11.
72     * Language encoding flag (EFS).  If this bit is set,
73     * the filename and comment fields for this file
74     * must be encoded using UTF-8.
75     */
76    static final int GPBF_UTF8_FLAG = 1 << 11;
77
78    /**
79     * Supported General Purpose Bit Flags Mask.
80     * Bit mask of bits not supported.
81     * Note: The only bit that we will enforce at this time
82     * is the encrypted bit. Although other bits are not supported,
83     * we must not enforce them as this could break some legitimate
84     * use cases (See http://b/8617715).
85     */
86    static final int GPBF_UNSUPPORTED_MASK = GPBF_ENCRYPTED_FLAG;
87
88    /**
89     * Open zip file for reading.
90     */
91    public static final int OPEN_READ = 1;
92
93    /**
94     * Delete zip file when closed.
95     */
96    public static final int OPEN_DELETE = 4;
97
98    private final String filename;
99
100    private File fileToDeleteOnClose;
101
102    private RandomAccessFile raf;
103
104    private final LinkedHashMap<String, ZipEntry> entries = new LinkedHashMap<String, ZipEntry>();
105
106    private String comment;
107
108    private final CloseGuard guard = CloseGuard.get();
109
110    /**
111     * Constructs a new {@code ZipFile} allowing read access to the contents of the given file.
112     *
113     * <p>UTF-8 is used to decode all comments and entry names in the file.
114     *
115     * @throws ZipException if a zip error occurs.
116     * @throws IOException if an {@code IOException} occurs.
117     */
118    public ZipFile(File file) throws ZipException, IOException {
119        this(file, OPEN_READ);
120    }
121
122    /**
123     * Constructs a new {@code ZipFile} allowing read access to the contents of the given file.
124     *
125     * <p>UTF-8 is used to decode all comments and entry names in the file.
126     *
127     * @throws IOException if an IOException occurs.
128     */
129    public ZipFile(String name) throws IOException {
130        this(new File(name), OPEN_READ);
131    }
132
133    /**
134     * Constructs a new {@code ZipFile} allowing access to the given file.
135     *
136     * <p>UTF-8 is used to decode all comments and entry names in the file.
137     *
138     * <p>The {@code mode} must be either {@code OPEN_READ} or {@code OPEN_READ|OPEN_DELETE}.
139     * If the {@code OPEN_DELETE} flag is supplied, the file will be deleted at or before the
140     * time that the {@code ZipFile} is closed (the contents will remain accessible until
141     * this {@code ZipFile} is closed); it also calls {@code File.deleteOnExit}.
142     *
143     * @throws IOException if an {@code IOException} occurs.
144     */
145    public ZipFile(File file, int mode) throws IOException {
146        filename = file.getPath();
147        if (mode != OPEN_READ && mode != (OPEN_READ | OPEN_DELETE)) {
148            throw new IllegalArgumentException("Bad mode: " + mode);
149        }
150
151        if ((mode & OPEN_DELETE) != 0) {
152            fileToDeleteOnClose = file;
153            fileToDeleteOnClose.deleteOnExit();
154        } else {
155            fileToDeleteOnClose = null;
156        }
157
158        raf = new RandomAccessFile(filename, "r");
159
160        // Make sure to close the RandomAccessFile if reading the central directory fails.
161        boolean mustCloseFile = true;
162        try {
163            readCentralDir();
164
165            // Read succeeded so do not close the underlying RandomAccessFile.
166            mustCloseFile = false;
167        } finally {
168            if (mustCloseFile) {
169                IoUtils.closeQuietly(raf);
170            }
171        }
172
173        guard.open("close");
174    }
175
176    @Override protected void finalize() throws IOException {
177        try {
178            if (guard != null) {
179                guard.warnIfOpen();
180            }
181        } finally {
182            try {
183                super.finalize();
184            } catch (Throwable t) {
185                throw new AssertionError(t);
186            }
187        }
188    }
189
190    /**
191     * Closes this zip file. This method is idempotent. This method may cause I/O if the
192     * zip file needs to be deleted.
193     *
194     * @throws IOException
195     *             if an IOException occurs.
196     */
197    public void close() throws IOException {
198        guard.close();
199
200        RandomAccessFile localRaf = raf;
201        if (localRaf != null) { // Only close initialized instances
202            synchronized (localRaf) {
203                raf = null;
204                localRaf.close();
205            }
206            if (fileToDeleteOnClose != null) {
207                fileToDeleteOnClose.delete();
208                fileToDeleteOnClose = null;
209            }
210        }
211    }
212
213    private void checkNotClosed() {
214        if (raf == null) {
215            throw new IllegalStateException("Zip file closed");
216        }
217    }
218
219    /**
220     * Returns an enumeration of the entries. The entries are listed in the
221     * order in which they appear in the zip file.
222     *
223     * <p>If you only need to iterate over the entries in a zip file, and don't
224     * need random-access entry lookup by name, you should probably use {@link ZipInputStream}
225     * instead, to avoid paying to construct the in-memory index.
226     *
227     * @throws IllegalStateException if this zip file has been closed.
228     */
229    public Enumeration<? extends ZipEntry> entries() {
230        checkNotClosed();
231        final Iterator<ZipEntry> iterator = entries.values().iterator();
232
233        return new Enumeration<ZipEntry>() {
234            public boolean hasMoreElements() {
235                checkNotClosed();
236                return iterator.hasNext();
237            }
238
239            public ZipEntry nextElement() {
240                checkNotClosed();
241                return iterator.next();
242            }
243        };
244    }
245
246    /**
247     * Returns this file's comment, or null if it doesn't have one.
248     * See {@link ZipOutputStream#setComment}.
249     *
250     * @throws IllegalStateException if this zip file has been closed.
251     * @since 1.7
252     */
253    public String getComment() {
254        checkNotClosed();
255        return comment;
256    }
257
258    /**
259     * Returns the zip entry with the given name, or null if there is no such entry.
260     *
261     * @throws IllegalStateException if this zip file has been closed.
262     */
263    public ZipEntry getEntry(String entryName) {
264        checkNotClosed();
265        if (entryName == null) {
266            throw new NullPointerException("entryName == null");
267        }
268
269        ZipEntry ze = entries.get(entryName);
270        if (ze == null) {
271            ze = entries.get(entryName + "/");
272        }
273        return ze;
274    }
275
276    /**
277     * Returns an input stream on the data of the specified {@code ZipEntry}.
278     *
279     * @param entry
280     *            the ZipEntry.
281     * @return an input stream of the data contained in the {@code ZipEntry}.
282     * @throws IOException
283     *             if an {@code IOException} occurs.
284     * @throws IllegalStateException if this zip file has been closed.
285     */
286    public InputStream getInputStream(ZipEntry entry) throws IOException {
287        // Make sure this ZipEntry is in this Zip file.  We run it through the name lookup.
288        entry = getEntry(entry.getName());
289        if (entry == null) {
290            return null;
291        }
292
293        // Create an InputStream at the right part of the file.
294        RandomAccessFile localRaf = raf;
295        synchronized (localRaf) {
296            // We don't know the entry data's start position. All we have is the
297            // position of the entry's local header.
298            // http://www.pkware.com/documents/casestudies/APPNOTE.TXT
299            RAFStream rafStream = new RAFStream(localRaf, entry.localHeaderRelOffset);
300            DataInputStream is = new DataInputStream(rafStream);
301
302            final int localMagic = Integer.reverseBytes(is.readInt());
303            if (localMagic != LOCSIG) {
304                throwZipException("Local File Header", localMagic);
305            }
306
307            is.skipBytes(2);
308
309            // At position 6 we find the General Purpose Bit Flag.
310            int gpbf = Short.reverseBytes(is.readShort()) & 0xffff;
311            if ((gpbf & ZipFile.GPBF_UNSUPPORTED_MASK) != 0) {
312                throw new ZipException("Invalid General Purpose Bit Flag: " + gpbf);
313            }
314
315            // Offset 26 has the file name length, and offset 28 has the extra field length.
316            // These lengths can differ from the ones in the central header.
317            is.skipBytes(18);
318            int fileNameLength = Short.reverseBytes(is.readShort()) & 0xffff;
319            int extraFieldLength = Short.reverseBytes(is.readShort()) & 0xffff;
320            is.close();
321
322            // Skip the variable-size file name and extra field data.
323            rafStream.skip(fileNameLength + extraFieldLength);
324
325            if (entry.compressionMethod == ZipEntry.STORED) {
326                rafStream.endOffset = rafStream.offset + entry.size;
327                return rafStream;
328            } else {
329                rafStream.endOffset = rafStream.offset + entry.compressedSize;
330                int bufSize = Math.max(1024, (int) Math.min(entry.getSize(), 65535L));
331                return new ZipInflaterInputStream(rafStream, new Inflater(true), bufSize, entry);
332            }
333        }
334    }
335
336    /**
337     * Gets the file name of this {@code ZipFile}.
338     *
339     * @return the file name of this {@code ZipFile}.
340     */
341    public String getName() {
342        return filename;
343    }
344
345    /**
346     * Returns the number of {@code ZipEntries} in this {@code ZipFile}.
347     *
348     * @return the number of entries in this file.
349     * @throws IllegalStateException if this zip file has been closed.
350     */
351    public int size() {
352        checkNotClosed();
353        return entries.size();
354    }
355
356    /**
357     * Find the central directory and read the contents.
358     *
359     * <p>The central directory can be followed by a variable-length comment
360     * field, so we have to scan through it backwards.  The comment is at
361     * most 64K, plus we have 18 bytes for the end-of-central-dir stuff
362     * itself, plus apparently sometimes people throw random junk on the end
363     * just for the fun of it.
364     *
365     * <p>This is all a little wobbly.  If the wrong value ends up in the EOCD
366     * area, we're hosed. This appears to be the way that everybody handles
367     * it though, so we're in good company if this fails.
368     */
369    private void readCentralDir() throws IOException {
370        // Scan back, looking for the End Of Central Directory field. If the zip file doesn't
371        // have an overall comment (unrelated to any per-entry comments), we'll hit the EOCD
372        // on the first try.
373        // No need to synchronize raf here -- we only do this when we first open the zip file.
374        long scanOffset = raf.length() - ENDHDR;
375        if (scanOffset < 0) {
376            throw new ZipException("File too short to be a zip file: " + raf.length());
377        }
378
379        raf.seek(0);
380        final int headerMagic = Integer.reverseBytes(raf.readInt());
381        if (headerMagic == ENDSIG) {
382            throw new ZipException("Empty zip archive not supported");
383        }
384        if (headerMagic != LOCSIG) {
385            throw new ZipException("Not a zip archive");
386        }
387
388        long stopOffset = scanOffset - 65536;
389        if (stopOffset < 0) {
390            stopOffset = 0;
391        }
392
393        while (true) {
394            raf.seek(scanOffset);
395            if (Integer.reverseBytes(raf.readInt()) == ENDSIG) {
396                break;
397            }
398
399            scanOffset--;
400            if (scanOffset < stopOffset) {
401                throw new ZipException("End Of Central Directory signature not found");
402            }
403        }
404
405        // Read the End Of Central Directory. ENDHDR includes the signature bytes,
406        // which we've already read.
407        byte[] eocd = new byte[ENDHDR - 4];
408        raf.readFully(eocd);
409
410        // Pull out the information we need.
411        BufferIterator it = HeapBufferIterator.iterator(eocd, 0, eocd.length, ByteOrder.LITTLE_ENDIAN);
412        int diskNumber = it.readShort() & 0xffff;
413        int diskWithCentralDir = it.readShort() & 0xffff;
414        int numEntries = it.readShort() & 0xffff;
415        int totalNumEntries = it.readShort() & 0xffff;
416        it.skip(4); // Ignore centralDirSize.
417        long centralDirOffset = ((long) it.readInt()) & 0xffffffffL;
418        int commentLength = it.readShort() & 0xffff;
419
420        if (numEntries != totalNumEntries || diskNumber != 0 || diskWithCentralDir != 0) {
421            throw new ZipException("Spanned archives not supported");
422        }
423
424        if (commentLength > 0) {
425            byte[] commentBytes = new byte[commentLength];
426            raf.readFully(commentBytes);
427            comment = new String(commentBytes, 0, commentBytes.length, StandardCharsets.UTF_8);
428        }
429
430        // Seek to the first CDE and read all entries.
431        // We have to do this now (from the constructor) rather than lazily because the
432        // public API doesn't allow us to throw IOException except from the constructor
433        // or from getInputStream.
434        RAFStream rafStream = new RAFStream(raf, centralDirOffset);
435        BufferedInputStream bufferedStream = new BufferedInputStream(rafStream, 4096);
436        byte[] hdrBuf = new byte[CENHDR]; // Reuse the same buffer for each entry.
437        for (int i = 0; i < numEntries; ++i) {
438            ZipEntry newEntry = new ZipEntry(hdrBuf, bufferedStream, StandardCharsets.UTF_8);
439            if (newEntry.localHeaderRelOffset >= centralDirOffset) {
440                throw new ZipException("Local file header offset is after central directory");
441            }
442            String entryName = newEntry.getName();
443            if (entries.put(entryName, newEntry) != null) {
444                throw new ZipException("Duplicate entry name: " + entryName);
445            }
446        }
447    }
448
449    static void throwZipException(String msg, int magic) throws ZipException {
450        final String hexString = IntegralToString.intToHexString(magic, true, 8);
451        throw new ZipException(msg + " signature not found; was " + hexString);
452    }
453
454    /**
455     * Wrap a stream around a RandomAccessFile.  The RandomAccessFile is shared
456     * among all streams returned by getInputStream(), so we have to synchronize
457     * access to it.  (We can optimize this by adding buffering here to reduce
458     * collisions.)
459     *
460     * <p>We could support mark/reset, but we don't currently need them.
461     *
462     * @hide
463     */
464    public static class RAFStream extends InputStream {
465        private final RandomAccessFile sharedRaf;
466        private long endOffset;
467        private long offset;
468
469
470        public RAFStream(RandomAccessFile raf, long initialOffset, long endOffset) {
471            sharedRaf = raf;
472            offset = initialOffset;
473            this.endOffset = endOffset;
474        }
475
476        public RAFStream(RandomAccessFile raf, long initialOffset) throws IOException {
477            this(raf, initialOffset, raf.length());
478        }
479
480        @Override public int available() throws IOException {
481            return (offset < endOffset ? 1 : 0);
482        }
483
484        @Override public int read() throws IOException {
485            return Streams.readSingleByte(this);
486        }
487
488        @Override public int read(byte[] buffer, int byteOffset, int byteCount) throws IOException {
489            synchronized (sharedRaf) {
490                final long length = endOffset - offset;
491                if (byteCount > length) {
492                    byteCount = (int) length;
493                }
494                sharedRaf.seek(offset);
495                int count = sharedRaf.read(buffer, byteOffset, byteCount);
496                if (count > 0) {
497                    offset += count;
498                    return count;
499                } else {
500                    return -1;
501                }
502            }
503        }
504
505        @Override public long skip(long byteCount) throws IOException {
506            if (byteCount > endOffset - offset) {
507                byteCount = endOffset - offset;
508            }
509            offset += byteCount;
510            return byteCount;
511        }
512
513        public int fill(Inflater inflater, int nativeEndBufSize) throws IOException {
514            synchronized (sharedRaf) {
515                int len = Math.min((int) (endOffset - offset), nativeEndBufSize);
516                int cnt = inflater.setFileInput(sharedRaf.getFD(), offset, nativeEndBufSize);
517                // setFileInput read from the file, so we need to get the OS and RAFStream back
518                // in sync...
519                skip(cnt);
520                return len;
521            }
522        }
523    }
524
525    /** @hide */
526    public static class ZipInflaterInputStream extends InflaterInputStream {
527        private final ZipEntry entry;
528        private long bytesRead = 0;
529
530        public ZipInflaterInputStream(InputStream is, Inflater inf, int bsize, ZipEntry entry) {
531            super(is, inf, bsize);
532            this.entry = entry;
533        }
534
535        @Override public int read(byte[] buffer, int byteOffset, int byteCount) throws IOException {
536            final int i;
537            try {
538                i = super.read(buffer, byteOffset, byteCount);
539            } catch (IOException e) {
540                throw new IOException("Error reading data for " + entry.getName() + " near offset "
541                        + bytesRead, e);
542            }
543            if (i == -1) {
544                if (entry.size != bytesRead) {
545                    throw new IOException("Size mismatch on inflated file: " + bytesRead + " vs "
546                            + entry.size);
547                }
548            } else {
549                bytesRead += i;
550            }
551            return i;
552        }
553
554        @Override public int available() throws IOException {
555            if (closed) {
556                // Our superclass will throw an exception, but there's a jtreg test that
557                // explicitly checks that the InputStream returned from ZipFile.getInputStream
558                // returns 0 even when closed.
559                return 0;
560            }
561            return super.available() == 0 ? 0 : (int) (entry.getSize() - bytesRead);
562        }
563    }
564}
565