ZipFileRO.h revision edbf3b6af777b721cd2a1ef461947e51e88241e1
1/*
2 * Copyright (C) 2007 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//
18// Read-only access to Zip archives, with minimal heap allocation.
19//
20// This is similar to the more-complete ZipFile class, but no attempt
21// has been made to make them interchangeable.  This class operates under
22// a very different set of assumptions and constraints.
23//
24#ifndef __LIBS_ZIPFILERO_H
25#define __LIBS_ZIPFILERO_H
26
27#include "Errors.h"
28#include "FileMap.h"
29
30#include <stdio.h>
31#include <stdlib.h>
32#include <unistd.h>
33
34namespace android {
35
36/*
37 * Trivial typedef to ensure that ZipEntryRO is not treated as a simple
38 * integer.  We use NULL to indicate an invalid value.
39 */
40typedef void* ZipEntryRO;
41
42/*
43 * Open a Zip archive for reading.
44 *
45 * We want "open" and "find entry by name" to be fast operations, and we
46 * want to use as little memory as possible.  We memory-map the file,
47 * and load a hash table with pointers to the filenames (which aren't
48 * null-terminated).  The other fields are at a fixed offset from the
49 * filename, so we don't need to extract those (but we do need to byte-read
50 * and endian-swap them every time we want them).
51 *
52 * To speed comparisons when doing a lookup by name, we could make the mapping
53 * "private" (copy-on-write) and null-terminate the filenames after verifying
54 * the record structure.  However, this requires a private mapping of
55 * every page that the Central Directory touches.  Easier to tuck a copy
56 * of the string length into the hash table entry.
57 */
58class ZipFileRO {
59public:
60    ZipFileRO()
61        : mFd(-1), mFileMap(NULL), mHashTableSize(-1), mHashTable(NULL)
62        {}
63    ~ZipFileRO() {
64        free(mHashTable);
65        if (mFileMap)
66            mFileMap->release();
67        if (mFd >= 0)
68            close(mFd);
69    }
70
71    /*
72     * Open an archive.
73     */
74    status_t open(const char* zipFileName);
75
76    /*
77     * Find an entry, by name.  Returns the entry identifier, or NULL if
78     * not found.
79     *
80     * If two entries have the same name, one will be chosen at semi-random.
81     */
82    ZipEntryRO findEntryByName(const char* fileName) const;
83
84    /*
85     * Return the #of entries in the Zip archive.
86     */
87    int getNumEntries(void) const {
88        return mNumEntries;
89    }
90
91    /*
92     * Return the Nth entry.  Zip file entries are not stored in sorted
93     * order, and updated entries may appear at the end, so anyone walking
94     * the archive needs to avoid making ordering assumptions.  We take
95     * that further by returning the Nth non-empty entry in the hash table
96     * rather than the Nth entry in the archive.
97     *
98     * Valid values are [0..numEntries).
99     *
100     * [This is currently O(n).  If it needs to be fast we can allocate an
101     * additional data structure or provide an iterator interface.]
102     */
103    ZipEntryRO findEntryByIndex(int idx) const;
104
105    /*
106     * Copy the filename into the supplied buffer.  Returns 0 on success,
107     * -1 if "entry" is invalid, or the filename length if it didn't fit.  The
108     * length, and the returned string, include the null-termination.
109     */
110    int getEntryFileName(ZipEntryRO entry, char* buffer, int bufLen) const;
111
112    /*
113     * Get the vital stats for an entry.  Pass in NULL pointers for anything
114     * you don't need.
115     *
116     * "*pOffset" holds the Zip file offset of the entry's data.
117     *
118     * Returns "false" if "entry" is bogus or if the data in the Zip file
119     * appears to be bad.
120     */
121    bool getEntryInfo(ZipEntryRO entry, int* pMethod, long* pUncompLen,
122        long* pCompLen, off_t* pOffset, long* pModWhen, long* pCrc32) const;
123
124    /*
125     * Create a new FileMap object that maps a subset of the archive.  For
126     * an uncompressed entry this effectively provides a pointer to the
127     * actual data, for a compressed entry this provides the input buffer
128     * for inflate().
129     */
130    FileMap* createEntryFileMap(ZipEntryRO entry) const;
131
132    /*
133     * Uncompress the data into a buffer.  Depending on the compression
134     * format, this is either an "inflate" operation or a memcpy.
135     *
136     * Use "uncompLen" from getEntryInfo() to determine the required
137     * buffer size.
138     *
139     * Returns "true" on success.
140     */
141    bool uncompressEntry(ZipEntryRO entry, void* buffer) const;
142
143    /*
144     * Uncompress the data to an open file descriptor.
145     */
146    bool uncompressEntry(ZipEntryRO entry, int fd) const;
147
148    /* Zip compression methods we support */
149    enum {
150        kCompressStored     = 0,        // no compression
151        kCompressDeflated   = 8,        // standard deflate
152    };
153
154    /*
155     * Utility function: uncompress deflated data, buffer to buffer.
156     */
157    static bool inflateBuffer(void* outBuf, const void* inBuf,
158        long uncompLen, long compLen);
159
160    /*
161     * Utility function: uncompress deflated data, buffer to fd.
162     */
163    static bool inflateBuffer(int fd, const void* inBuf,
164        long uncompLen, long compLen);
165
166    /*
167     * Some basic functions for raw data manipulation.  "LE" means
168     * Little Endian.
169     */
170    static inline unsigned short get2LE(const unsigned char* buf) {
171        return buf[0] | (buf[1] << 8);
172    }
173    static inline unsigned long get4LE(const unsigned char* buf) {
174        return buf[0] | (buf[1] << 8) | (buf[2] << 16) | (buf[3] << 24);
175    }
176
177private:
178    /* these are private and not defined */
179    ZipFileRO(const ZipFileRO& src);
180    ZipFileRO& operator=(const ZipFileRO& src);
181
182    /* parse the archive, prepping internal structures */
183    bool parseZipArchive(void);
184
185    /* add a new entry to the hash table */
186    void addToHash(const char* str, int strLen, unsigned int hash);
187
188    /* compute string hash code */
189    static unsigned int computeHash(const char* str, int len);
190
191    /* convert a ZipEntryRO back to a hash table index */
192    int entryToIndex(const ZipEntryRO entry) const;
193
194    /*
195     * One entry in the hash table.
196     */
197    typedef struct HashEntry {
198        const char*     name;
199        unsigned short  nameLen;
200        //unsigned int    hash;
201    } HashEntry;
202
203    /* open Zip archive */
204    int         mFd;
205
206    /* mapped file */
207    FileMap*    mFileMap;
208
209    /* number of entries in the Zip archive */
210    int         mNumEntries;
211
212    /*
213     * We know how many entries are in the Zip archive, so we have a
214     * fixed-size hash table.  We probe for an empty slot.
215     */
216    int         mHashTableSize;
217    HashEntry*  mHashTable;
218};
219
220}; // namespace android
221
222#endif /*__LIBS_ZIPFILERO_H*/
223