1/*
2 * Copyright (C) 2007 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Read-only access to Zip archives, with minimal heap allocation.
19 *
20 * This is similar to the more-complete ZipFile class, but no attempt
21 * has been made to make them interchangeable.  This class operates under
22 * a very different set of assumptions and constraints.
23 *
24 * One such assumption is that if you're getting file descriptors for
25 * use with this class as a child of a fork() operation, you must be on
26 * a pread() to guarantee correct operation. This is because pread() can
27 * atomically read at a file offset without worrying about a lock around an
28 * lseek() + read() pair.
29 */
30#ifndef __LIBS_ZIPFILERO_H
31#define __LIBS_ZIPFILERO_H
32
33#include <utils/Errors.h>
34#include <utils/FileMap.h>
35#include <utils/threads.h>
36
37#include <stdio.h>
38#include <stdlib.h>
39#include <unistd.h>
40
41namespace android {
42
43/*
44 * Trivial typedef to ensure that ZipEntryRO is not treated as a simple
45 * integer.  We use NULL to indicate an invalid value.
46 */
47typedef void* ZipEntryRO;
48
49/*
50 * Open a Zip archive for reading.
51 *
52 * We want "open" and "find entry by name" to be fast operations, and we
53 * want to use as little memory as possible.  We memory-map the file,
54 * and load a hash table with pointers to the filenames (which aren't
55 * null-terminated).  The other fields are at a fixed offset from the
56 * filename, so we don't need to extract those (but we do need to byte-read
57 * and endian-swap them every time we want them).
58 *
59 * To speed comparisons when doing a lookup by name, we could make the mapping
60 * "private" (copy-on-write) and null-terminate the filenames after verifying
61 * the record structure.  However, this requires a private mapping of
62 * every page that the Central Directory touches.  Easier to tuck a copy
63 * of the string length into the hash table entry.
64 *
65 * NOTE: If this is used on file descriptors inherited from a fork() operation,
66 * you must be on a platform that implements pread() to guarantee correctness
67 * on the shared file descriptors.
68 */
69class ZipFileRO {
70public:
71    ZipFileRO()
72        : mFd(-1), mFileName(NULL), mFileLength(-1),
73          mDirectoryMap(NULL),
74          mNumEntries(-1), mDirectoryOffset(-1),
75          mHashTableSize(-1), mHashTable(NULL)
76        {}
77
78    ~ZipFileRO();
79
80    /*
81     * Open an archive.
82     */
83    status_t open(const char* zipFileName);
84
85    /*
86     * Find an entry, by name.  Returns the entry identifier, or NULL if
87     * not found.
88     *
89     * If two entries have the same name, one will be chosen at semi-random.
90     */
91    ZipEntryRO findEntryByName(const char* fileName) const;
92
93    /*
94     * Return the #of entries in the Zip archive.
95     */
96    int getNumEntries(void) const {
97        return mNumEntries;
98    }
99
100    /*
101     * Return the Nth entry.  Zip file entries are not stored in sorted
102     * order, and updated entries may appear at the end, so anyone walking
103     * the archive needs to avoid making ordering assumptions.  We take
104     * that further by returning the Nth non-empty entry in the hash table
105     * rather than the Nth entry in the archive.
106     *
107     * Valid values are [0..numEntries).
108     *
109     * [This is currently O(n).  If it needs to be fast we can allocate an
110     * additional data structure or provide an iterator interface.]
111     */
112    ZipEntryRO findEntryByIndex(int idx) const;
113
114    /*
115     * Copy the filename into the supplied buffer.  Returns 0 on success,
116     * -1 if "entry" is invalid, or the filename length if it didn't fit.  The
117     * length, and the returned string, include the null-termination.
118     */
119    int getEntryFileName(ZipEntryRO entry, char* buffer, int bufLen) const;
120
121    /*
122     * Get the vital stats for an entry.  Pass in NULL pointers for anything
123     * you don't need.
124     *
125     * "*pOffset" holds the Zip file offset of the entry's data.
126     *
127     * Returns "false" if "entry" is bogus or if the data in the Zip file
128     * appears to be bad.
129     */
130    bool getEntryInfo(ZipEntryRO entry, int* pMethod, size_t* pUncompLen,
131        size_t* pCompLen, off_t* pOffset, long* pModWhen, long* pCrc32) const;
132
133    /*
134     * Create a new FileMap object that maps a subset of the archive.  For
135     * an uncompressed entry this effectively provides a pointer to the
136     * actual data, for a compressed entry this provides the input buffer
137     * for inflate().
138     */
139    FileMap* createEntryFileMap(ZipEntryRO entry) const;
140
141    /*
142     * Uncompress the data into a buffer.  Depending on the compression
143     * format, this is either an "inflate" operation or a memcpy.
144     *
145     * Use "uncompLen" from getEntryInfo() to determine the required
146     * buffer size.
147     *
148     * Returns "true" on success.
149     */
150    bool uncompressEntry(ZipEntryRO entry, void* buffer) const;
151
152    /*
153     * Uncompress the data to an open file descriptor.
154     */
155    bool uncompressEntry(ZipEntryRO entry, int fd) const;
156
157    /* Zip compression methods we support */
158    enum {
159        kCompressStored     = 0,        // no compression
160        kCompressDeflated   = 8,        // standard deflate
161    };
162
163    /*
164     * Utility function: uncompress deflated data, buffer to buffer.
165     */
166    static bool inflateBuffer(void* outBuf, const void* inBuf,
167        size_t uncompLen, size_t compLen);
168
169    /*
170     * Utility function: uncompress deflated data, buffer to fd.
171     */
172    static bool inflateBuffer(int fd, const void* inBuf,
173        size_t uncompLen, size_t compLen);
174
175    /*
176     * Some basic functions for raw data manipulation.  "LE" means
177     * Little Endian.
178     */
179    static inline unsigned short get2LE(const unsigned char* buf) {
180        return buf[0] | (buf[1] << 8);
181    }
182    static inline unsigned long get4LE(const unsigned char* buf) {
183        return buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
184    }
185
186private:
187    /* these are private and not defined */
188    ZipFileRO(const ZipFileRO& src);
189    ZipFileRO& operator=(const ZipFileRO& src);
190
191    /* locate and parse the central directory */
192    bool mapCentralDirectory(void);
193
194    /* parse the archive, prepping internal structures */
195    bool parseZipArchive(void);
196
197    /* add a new entry to the hash table */
198    void addToHash(const char* str, int strLen, unsigned int hash);
199
200    /* compute string hash code */
201    static unsigned int computeHash(const char* str, int len);
202
203    /* convert a ZipEntryRO back to a hash table index */
204    int entryToIndex(const ZipEntryRO entry) const;
205
206    /*
207     * One entry in the hash table.
208     */
209    typedef struct HashEntry {
210        const char*     name;
211        unsigned short  nameLen;
212        //unsigned int    hash;
213    } HashEntry;
214
215    /* open Zip archive */
216    int         mFd;
217
218    /* Lock for handling the file descriptor (seeks, etc) */
219    mutable Mutex mFdLock;
220
221    /* zip file name */
222    char*       mFileName;
223
224    /* length of file */
225    size_t      mFileLength;
226
227    /* mapped file */
228    FileMap*    mDirectoryMap;
229
230    /* number of entries in the Zip archive */
231    int         mNumEntries;
232
233    /* CD directory offset in the Zip archive */
234    off_t       mDirectoryOffset;
235
236    /*
237     * We know how many entries are in the Zip archive, so we have a
238     * fixed-size hash table.  We probe for an empty slot.
239     */
240    int         mHashTableSize;
241    HashEntry*  mHashTable;
242};
243
244}; // namespace android
245
246#endif /*__LIBS_ZIPFILERO_H*/
247