1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16/*
17 * Read-only access to Zip archives, with minimal heap allocation.
18 */
19#include "ZipArchive.h"
20
21#include <zlib.h>
22
23#include <stdlib.h>
24#include <string.h>
25#include <fcntl.h>
26#include <errno.h>
27
28
29/*
30 * Zip file constants.
31 */
32#define kEOCDSignature      0x06054b50
33#define kEOCDLen            22
34#define kEOCDNumEntries     8               // offset to #of entries in file
35#define kEOCDFileOffset     16              // offset to central directory
36
37#define kMaxCommentLen      65535           // longest possible in ushort
38#define kMaxEOCDSearch      (kMaxCommentLen + kEOCDLen)
39
40#define kLFHSignature       0x04034b50
41#define kLFHLen             30              // excluding variable-len fields
42#define kLFHNameLen         26              // offset to filename length
43#define kLFHExtraLen        28              // offset to extra length
44
45#define kCDESignature       0x02014b50
46#define kCDELen             46              // excluding variable-len fields
47#define kCDEMethod          10              // offset to compression method
48#define kCDEModWhen         12              // offset to modification timestamp
49#define kCDECRC             16              // offset to entry CRC
50#define kCDECompLen         20              // offset to compressed length
51#define kCDEUncompLen       24              // offset to uncompressed length
52#define kCDENameLen         28              // offset to filename length
53#define kCDEExtraLen        30              // offset to extra length
54#define kCDECommentLen      32              // offset to comment length
55#define kCDELocalOffset     42              // offset to local hdr
56
57/*
58 * The values we return for ZipEntry use 0 as an invalid value, so we
59 * want to adjust the hash table index by a fixed amount.  Using a large
60 * value helps insure that people don't mix & match arguments, e.g. with
61 * entry indices.
62 */
63#define kZipEntryAdj        10000
64
65/*
66 * Convert a ZipEntry to a hash table index, verifying that it's in a
67 * valid range.
68 */
69static int entryToIndex(const ZipArchive* pArchive, const ZipEntry entry)
70{
71    long ent = ((long) entry) - kZipEntryAdj;
72    if (ent < 0 || ent >= pArchive->mHashTableSize ||
73        pArchive->mHashTable[ent].name == NULL)
74    {
75        LOGW("Invalid ZipEntry %p (%ld)\n", entry, ent);
76        return -1;
77    }
78    return ent;
79}
80
81/*
82 * Simple string hash function for non-null-terminated strings.
83 */
84static unsigned int computeHash(const char* str, int len)
85{
86    unsigned int hash = 0;
87
88    while (len--)
89        hash = hash * 31 + *str++;
90
91    return hash;
92}
93
94/*
95 * Add a new entry to the hash table.
96 */
97static void addToHash(ZipArchive* pArchive, const char* str, int strLen,
98    unsigned int hash)
99{
100    const int hashTableSize = pArchive->mHashTableSize;
101    int ent = hash & (hashTableSize - 1);
102
103    /*
104     * We over-allocated the table, so we're guaranteed to find an empty slot.
105     */
106    while (pArchive->mHashTable[ent].name != NULL)
107        ent = (ent + 1) & (hashTableSize-1);
108
109    pArchive->mHashTable[ent].name = str;
110    pArchive->mHashTable[ent].nameLen = strLen;
111}
112
113/*
114 * Get 2 little-endian bytes.
115 */
116static u2 get2LE(unsigned char const* pSrc)
117{
118    return pSrc[0] | (pSrc[1] << 8);
119}
120
121/*
122 * Get 4 little-endian bytes.
123 */
124static u4 get4LE(unsigned char const* pSrc)
125{
126    u4 result;
127
128    result = pSrc[0];
129    result |= pSrc[1] << 8;
130    result |= pSrc[2] << 16;
131    result |= pSrc[3] << 24;
132
133    return result;
134}
135
136/*
137 * Parse the Zip archive, verifying its contents and initializing internal
138 * data structures.
139 */
140static bool parseZipArchive(ZipArchive* pArchive, const MemMapping* pMap)
141{
142#define CHECK_OFFSET(_off) {                                                \
143        if ((unsigned int) (_off) >= maxOffset) {                           \
144            LOGE("ERROR: bad offset %u (max %d): %s\n",                     \
145                (unsigned int) (_off), maxOffset, #_off);                   \
146            goto bail;                                                      \
147        }                                                                   \
148    }
149    bool result = false;
150    const unsigned char* basePtr = (const unsigned char*)pMap->addr;
151    const unsigned char* ptr;
152    size_t length = pMap->length;
153    unsigned int i, numEntries, cdOffset;
154    unsigned int val;
155
156    /*
157     * The first 4 bytes of the file will either be the local header
158     * signature for the first file (kLFHSignature) or, if the archive doesn't
159     * have any files in it, the end-of-central-directory signature
160     * (kEOCDSignature).
161     */
162    val = get4LE(basePtr);
163    if (val == kEOCDSignature) {
164        LOGI("Found Zip archive, but it looks empty\n");
165        goto bail;
166    } else if (val != kLFHSignature) {
167        LOGV("Not a Zip archive (found 0x%08x)\n", val);
168        goto bail;
169    }
170
171    /*
172     * Find the EOCD.  We'll find it immediately unless they have a file
173     * comment.
174     */
175    ptr = basePtr + length - kEOCDLen;
176
177    while (ptr >= basePtr) {
178        if (*ptr == (kEOCDSignature & 0xff) && get4LE(ptr) == kEOCDSignature)
179            break;
180        ptr--;
181    }
182    if (ptr < basePtr) {
183        LOGI("Could not find end-of-central-directory in Zip\n");
184        goto bail;
185    }
186
187    /*
188     * There are two interesting items in the EOCD block: the number of
189     * entries in the file, and the file offset of the start of the
190     * central directory.
191     *
192     * (There's actually a count of the #of entries in this file, and for
193     * all files which comprise a spanned archive, but for our purposes
194     * we're only interested in the current file.  Besides, we expect the
195     * two to be equivalent for our stuff.)
196     */
197    numEntries = get2LE(ptr + kEOCDNumEntries);
198    cdOffset = get4LE(ptr + kEOCDFileOffset);
199
200    /* valid offsets are [0,EOCD] */
201    unsigned int maxOffset;
202    maxOffset = (ptr - basePtr) +1;
203
204    LOGV("+++ numEntries=%d cdOffset=%d\n", numEntries, cdOffset);
205    if (numEntries == 0 || cdOffset >= length) {
206        LOGW("Invalid entries=%d offset=%d (len=%zd)\n",
207            numEntries, cdOffset, length);
208        goto bail;
209    }
210
211    /*
212     * Create hash table.  We have a minimum 75% load factor, possibly as
213     * low as 50% after we round off to a power of 2.  There must be at
214     * least one unused entry to avoid an infinite loop during creation.
215     */
216    pArchive->mNumEntries = numEntries;
217    pArchive->mHashTableSize = dexRoundUpPower2(1 + (numEntries * 4) / 3);
218    pArchive->mHashTable = (ZipHashEntry*)
219            calloc(pArchive->mHashTableSize, sizeof(ZipHashEntry));
220
221    /*
222     * Walk through the central directory, adding entries to the hash
223     * table.
224     */
225    ptr = basePtr + cdOffset;
226    for (i = 0; i < numEntries; i++) {
227        unsigned int fileNameLen, extraLen, commentLen, localHdrOffset;
228        const unsigned char* localHdr;
229        unsigned int hash;
230
231        if (get4LE(ptr) != kCDESignature) {
232            LOGW("Missed a central dir sig (at %d)\n", i);
233            goto bail;
234        }
235        if (ptr + kCDELen > basePtr + length) {
236            LOGW("Ran off the end (at %d)\n", i);
237            goto bail;
238        }
239
240        localHdrOffset = get4LE(ptr + kCDELocalOffset);
241        CHECK_OFFSET(localHdrOffset);
242        fileNameLen = get2LE(ptr + kCDENameLen);
243        extraLen = get2LE(ptr + kCDEExtraLen);
244        commentLen = get2LE(ptr + kCDECommentLen);
245
246        //LOGV("+++ %d: localHdr=%d fnl=%d el=%d cl=%d\n",
247        //    i, localHdrOffset, fileNameLen, extraLen, commentLen);
248        //LOGV(" '%.*s'\n", fileNameLen, ptr + kCDELen);
249
250        /* add the CDE filename to the hash table */
251        hash = computeHash((const char*)ptr + kCDELen, fileNameLen);
252        addToHash(pArchive, (const char*)ptr + kCDELen, fileNameLen, hash);
253
254        localHdr = basePtr + localHdrOffset;
255        if (get4LE(localHdr) != kLFHSignature) {
256            LOGW("Bad offset to local header: %d (at %d)\n",
257                localHdrOffset, i);
258            goto bail;
259        }
260
261        ptr += kCDELen + fileNameLen + extraLen + commentLen;
262        CHECK_OFFSET(ptr - basePtr);
263    }
264
265    result = true;
266
267bail:
268    return result;
269#undef CHECK_OFFSET
270}
271
272/*
273 * Open the specified file read-only.  We memory-map the entire thing and
274 * parse the contents.
275 *
276 * This will be called on non-Zip files, especially during VM startup, so
277 * we don't want to be too noisy about certain types of failure.  (Do
278 * we want a "quiet" flag?)
279 *
280 * On success, we fill out the contents of "pArchive" and return 0.
281 */
282int dexZipOpenArchive(const char* fileName, ZipArchive* pArchive)
283{
284    int fd, err;
285
286    LOGV("Opening archive '%s' %p\n", fileName, pArchive);
287
288    memset(pArchive, 0, sizeof(ZipArchive));
289
290    fd = open(fileName, O_RDONLY, 0);
291    if (fd < 0) {
292        err = errno ? errno : -1;
293        LOGV("Unable to open '%s': %s\n", fileName, strerror(err));
294        return err;
295    }
296
297    return dexZipPrepArchive(fd, fileName, pArchive);
298}
299
300/*
301 * Prepare to access a ZipArchive in an open file descriptor.
302 */
303int dexZipPrepArchive(int fd, const char* debugFileName, ZipArchive* pArchive)
304{
305    MemMapping map;
306    int err;
307
308    map.addr = NULL;
309    memset(pArchive, 0, sizeof(*pArchive));
310
311    pArchive->mFd = fd;
312
313    if (sysMapFileInShmemReadOnly(pArchive->mFd, &map) != 0) {
314        err = -1;
315        LOGW("Map of '%s' failed\n", debugFileName);
316        goto bail;
317    }
318
319    if (map.length < kEOCDLen) {
320        err = -1;
321        LOGV("File '%s' too small to be zip (%zd)\n", debugFileName,map.length);
322        goto bail;
323    }
324
325    if (!parseZipArchive(pArchive, &map)) {
326        err = -1;
327        LOGV("Parsing '%s' failed\n", debugFileName);
328        goto bail;
329    }
330
331    /* success */
332    err = 0;
333    sysCopyMap(&pArchive->mMap, &map);
334    map.addr = NULL;
335
336bail:
337    if (err != 0)
338        dexZipCloseArchive(pArchive);
339    if (map.addr != NULL)
340        sysReleaseShmem(&map);
341    return err;
342}
343
344
345/*
346 * Close a ZipArchive, closing the file and freeing the contents.
347 *
348 * NOTE: the ZipArchive may not have been fully created.
349 */
350void dexZipCloseArchive(ZipArchive* pArchive)
351{
352    LOGV("Closing archive %p\n", pArchive);
353
354    if (pArchive->mFd >= 0)
355        close(pArchive->mFd);
356
357    sysReleaseShmem(&pArchive->mMap);
358
359    free(pArchive->mHashTable);
360
361    pArchive->mFd = -1;
362    pArchive->mNumEntries = -1;
363    pArchive->mHashTableSize = -1;
364    pArchive->mHashTable = NULL;
365}
366
367
368/*
369 * Find a matching entry.
370 *
371 * Returns 0 if not found.
372 */
373ZipEntry dexZipFindEntry(const ZipArchive* pArchive, const char* entryName)
374{
375    int nameLen = strlen(entryName);
376    unsigned int hash = computeHash(entryName, nameLen);
377    const int hashTableSize = pArchive->mHashTableSize;
378    int ent = hash & (hashTableSize-1);
379
380    while (pArchive->mHashTable[ent].name != NULL) {
381        if (pArchive->mHashTable[ent].nameLen == nameLen &&
382            memcmp(pArchive->mHashTable[ent].name, entryName, nameLen) == 0)
383        {
384            /* match */
385            return (ZipEntry) (ent + kZipEntryAdj);
386        }
387
388        ent = (ent + 1) & (hashTableSize-1);
389    }
390
391    return NULL;
392}
393
394#if 0
395/*
396 * Find the Nth entry.
397 *
398 * This currently involves walking through the sparse hash table, counting
399 * non-empty entries.  If we need to speed this up we can either allocate
400 * a parallel lookup table or (perhaps better) provide an iterator interface.
401 */
402ZipEntry findEntryByIndex(ZipArchive* pArchive, int idx)
403{
404    if (idx < 0 || idx >= pArchive->mNumEntries) {
405        LOGW("Invalid index %d\n", idx);
406        return NULL;
407    }
408
409    int ent;
410    for (ent = 0; ent < pArchive->mHashTableSize; ent++) {
411        if (pArchive->mHashTable[ent].name != NULL) {
412            if (idx-- == 0)
413                return (ZipEntry) (ent + kZipEntryAdj);
414        }
415    }
416
417    return NULL;
418}
419#endif
420
421/*
422 * Get the useful fields from the zip entry.
423 *
424 * Returns "false" if the offsets to the fields or the contents of the fields
425 * appear to be bogus.
426 */
427bool dexZipGetEntryInfo(const ZipArchive* pArchive, ZipEntry entry,
428    int* pMethod, long* pUncompLen, long* pCompLen, off_t* pOffset,
429    long* pModWhen, long* pCrc32)
430{
431    int ent = entryToIndex(pArchive, entry);
432    if (ent < 0)
433        return false;
434
435    /*
436     * Recover the start of the central directory entry from the filename
437     * pointer.
438     */
439    const unsigned char* basePtr = (const unsigned char*)
440        pArchive->mMap.addr;
441    const unsigned char* ptr = (const unsigned char*)
442        pArchive->mHashTable[ent].name;
443    size_t zipLength =
444        pArchive->mMap.length;
445
446    ptr -= kCDELen;
447
448    int method = get2LE(ptr + kCDEMethod);
449    if (pMethod != NULL)
450        *pMethod = method;
451
452    if (pModWhen != NULL)
453        *pModWhen = get4LE(ptr + kCDEModWhen);
454    if (pCrc32 != NULL)
455        *pCrc32 = get4LE(ptr + kCDECRC);
456
457    /*
458     * We need to make sure that the lengths are not so large that somebody
459     * trying to map the compressed or uncompressed data runs off the end
460     * of the mapped region.
461     */
462    unsigned long localHdrOffset = get4LE(ptr + kCDELocalOffset);
463    if (localHdrOffset + kLFHLen >= zipLength) {
464        LOGE("ERROR: bad local hdr offset in zip\n");
465        return false;
466    }
467    const unsigned char* localHdr = basePtr + localHdrOffset;
468    off_t dataOffset = localHdrOffset + kLFHLen
469        + get2LE(localHdr + kLFHNameLen) + get2LE(localHdr + kLFHExtraLen);
470    if ((unsigned long) dataOffset >= zipLength) {
471        LOGE("ERROR: bad data offset in zip\n");
472        return false;
473    }
474
475    if (pCompLen != NULL) {
476        *pCompLen = get4LE(ptr + kCDECompLen);
477        if (*pCompLen < 0 || (size_t)(dataOffset + *pCompLen) >= zipLength) {
478            LOGE("ERROR: bad compressed length in zip\n");
479            return false;
480        }
481    }
482    if (pUncompLen != NULL) {
483        *pUncompLen = get4LE(ptr + kCDEUncompLen);
484        if (*pUncompLen < 0) {
485            LOGE("ERROR: negative uncompressed length in zip\n");
486            return false;
487        }
488        if (method == kCompressStored &&
489            (size_t)(dataOffset + *pUncompLen) >= zipLength)
490        {
491            LOGE("ERROR: bad uncompressed length in zip\n");
492            return false;
493        }
494    }
495
496    if (pOffset != NULL) {
497        *pOffset = dataOffset;
498    }
499    return true;
500}
501
502/*
503 * Uncompress "deflate" data from one buffer to an open file descriptor.
504 */
505static bool inflateToFile(int fd, const void* inBuf, long uncompLen,
506    long compLen)
507{
508    bool result = false;
509    const int kWriteBufSize = 32768;
510    unsigned char writeBuf[kWriteBufSize];
511    z_stream zstream;
512    int zerr;
513
514    /*
515     * Initialize the zlib stream struct.
516     */
517	memset(&zstream, 0, sizeof(zstream));
518    zstream.zalloc = Z_NULL;
519    zstream.zfree = Z_NULL;
520    zstream.opaque = Z_NULL;
521    zstream.next_in = (Bytef*)inBuf;
522    zstream.avail_in = compLen;
523    zstream.next_out = (Bytef*) writeBuf;
524    zstream.avail_out = sizeof(writeBuf);
525    zstream.data_type = Z_UNKNOWN;
526
527	/*
528	 * Use the undocumented "negative window bits" feature to tell zlib
529	 * that there's no zlib header waiting for it.
530	 */
531    zerr = inflateInit2(&zstream, -MAX_WBITS);
532    if (zerr != Z_OK) {
533        if (zerr == Z_VERSION_ERROR) {
534            LOGE("Installed zlib is not compatible with linked version (%s)\n",
535                ZLIB_VERSION);
536        } else {
537            LOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr);
538        }
539        goto bail;
540    }
541
542    /*
543     * Loop while we have more to do.
544     */
545    do {
546        /*
547         * Expand data.
548         */
549        zerr = inflate(&zstream, Z_NO_FLUSH);
550        if (zerr != Z_OK && zerr != Z_STREAM_END) {
551            LOGW("zlib inflate: zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n",
552                zerr, zstream.next_in, zstream.avail_in,
553                zstream.next_out, zstream.avail_out);
554            goto z_bail;
555        }
556
557        /* write when we're full or when we're done */
558        if (zstream.avail_out == 0 ||
559            (zerr == Z_STREAM_END && zstream.avail_out != sizeof(writeBuf)))
560        {
561            long writeSize = zstream.next_out - writeBuf;
562            int cc = write(fd, writeBuf, writeSize);
563            if (cc != (int) writeSize) {
564                if (cc < 0) {
565                    LOGW("write failed in inflate: %s\n", strerror(errno));
566                } else {
567                    LOGW("partial write in inflate (%d vs %ld)\n",
568                        cc, writeSize);
569                }
570                goto z_bail;
571            }
572
573            zstream.next_out = writeBuf;
574            zstream.avail_out = sizeof(writeBuf);
575        }
576    } while (zerr == Z_OK);
577
578    assert(zerr == Z_STREAM_END);       /* other errors should've been caught */
579
580    /* paranoia */
581    if ((long) zstream.total_out != uncompLen) {
582        LOGW("Size mismatch on inflated file (%ld vs %ld)\n",
583            zstream.total_out, uncompLen);
584        goto z_bail;
585    }
586
587    result = true;
588
589z_bail:
590    inflateEnd(&zstream);        /* free up any allocated structures */
591
592bail:
593    return result;
594}
595
596/*
597 * Uncompress an entry, in its entirety, to an open file descriptor.
598 *
599 * TODO: this doesn't verify the data's CRC, but probably should (especially
600 * for uncompressed data).
601 */
602bool dexZipExtractEntryToFile(const ZipArchive* pArchive,
603    const ZipEntry entry, int fd)
604{
605    bool result = false;
606    int ent = entryToIndex(pArchive, entry);
607    if (ent < 0)
608        return -1;
609
610    const unsigned char* basePtr = (const unsigned char*)pArchive->mMap.addr;
611    int method;
612    long uncompLen, compLen;
613    off_t offset;
614
615    if (!dexZipGetEntryInfo(pArchive, entry, &method, &uncompLen, &compLen,
616            &offset, NULL, NULL))
617    {
618        goto bail;
619    }
620
621    if (method == kCompressStored) {
622        ssize_t actual;
623
624        actual = write(fd, basePtr + offset, uncompLen);
625        if (actual < 0) {
626            LOGE("Write failed: %s\n", strerror(errno));
627            goto bail;
628        } else if (actual != uncompLen) {
629            LOGE("Partial write during uncompress (%d of %ld)\n",
630                (int) actual, uncompLen);
631            goto bail;
632        } else {
633            LOGI("+++ successful write\n");
634        }
635    } else {
636        if (!inflateToFile(fd, basePtr+offset, uncompLen, compLen))
637            goto bail;
638    }
639
640    result = true;
641
642bail:
643    return result;
644}
645
646