ZipFileRO.cpp revision 16c4d154dca43c662571129af31b27433b919a32
1/*
2 * Copyright (C) 2007 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//
18// Read-only access to Zip archives, with minimal heap allocation.
19//
20#define LOG_TAG "zipro"
21//#define LOG_NDEBUG 0
22#include <androidfw/ZipFileRO.h>
23#include <utils/Log.h>
24#include <utils/Compat.h>
25#include <utils/misc.h>
26#include <utils/threads.h>
27
28#include <zlib.h>
29
30#include <string.h>
31#include <fcntl.h>
32#include <errno.h>
33#include <assert.h>
34#include <unistd.h>
35
36/*
37 * We must open binary files using open(path, ... | O_BINARY) under Windows.
38 * Otherwise strange read errors will happen.
39 */
40#ifndef O_BINARY
41#  define O_BINARY  0
42#endif
43
44using namespace android;
45
46/*
47 * Zip file constants.
48 */
49#define kEOCDSignature       0x06054b50
50#define kEOCDLen             22
51#define kEOCDDiskNumber      4               // number of the current disk
52#define kEOCDDiskNumberForCD 6               // disk number with the Central Directory
53#define kEOCDNumEntries      8               // offset to #of entries in file
54#define kEOCDTotalNumEntries 10              // offset to total #of entries in spanned archives
55#define kEOCDSize            12              // size of the central directory
56#define kEOCDFileOffset      16              // offset to central directory
57#define kEOCDCommentSize     20              // offset to the length of the file comment
58
59#define kMaxCommentLen       65535           // longest possible in ushort
60#define kMaxEOCDSearch       (kMaxCommentLen + kEOCDLen)
61
62#define kLFHSignature        0x04034b50
63#define kLFHLen              30              // excluding variable-len fields
64#define kLFHGPBFlags          6              // offset to GPB flags
65#define kLFHNameLen          26              // offset to filename length
66#define kLFHExtraLen         28              // offset to extra length
67
68#define kCDESignature        0x02014b50
69#define kCDELen              46              // excluding variable-len fields
70#define kCDEGPBFlags          8              // offset to GPB flags
71#define kCDEMethod           10              // offset to compression method
72#define kCDEModWhen          12              // offset to modification timestamp
73#define kCDECRC              16              // offset to entry CRC
74#define kCDECompLen          20              // offset to compressed length
75#define kCDEUncompLen        24              // offset to uncompressed length
76#define kCDENameLen          28              // offset to filename length
77#define kCDEExtraLen         30              // offset to extra length
78#define kCDECommentLen       32              // offset to comment length
79#define kCDELocalOffset      42              // offset to local hdr
80
81/* General Purpose Bit Flag */
82#define kGPFEncryptedFlag    (1 << 0)
83#define kGPFUnsupportedMask  (kGPFEncryptedFlag)
84
85/*
86 * The values we return for ZipEntryRO use 0 as an invalid value, so we
87 * want to adjust the hash table index by a fixed amount.  Using a large
88 * value helps insure that people don't mix & match arguments, e.g. to
89 * findEntryByIndex().
90 */
91#define kZipEntryAdj        10000
92
93ZipFileRO::~ZipFileRO() {
94    free(mHashTable);
95    if (mDirectoryMap)
96        mDirectoryMap->release();
97    if (mFd >= 0)
98        TEMP_FAILURE_RETRY(close(mFd));
99    if (mFileName)
100        free(mFileName);
101}
102
103/*
104 * Convert a ZipEntryRO to a hash table index, verifying that it's in a
105 * valid range.
106 */
107int ZipFileRO::entryToIndex(const ZipEntryRO entry) const
108{
109    long ent = ((intptr_t) entry) - kZipEntryAdj;
110    if (ent < 0 || ent >= mHashTableSize || mHashTable[ent].name == NULL) {
111        ALOGW("Invalid ZipEntryRO %p (%ld)\n", entry, ent);
112        return -1;
113    }
114    return ent;
115}
116
117
118/*
119 * Open the specified file read-only.  We memory-map the entire thing and
120 * close the file before returning.
121 */
122status_t ZipFileRO::open(const char* zipFileName)
123{
124    int fd = -1;
125
126    assert(mDirectoryMap == NULL);
127
128    /*
129     * Open and map the specified file.
130     */
131    fd = TEMP_FAILURE_RETRY(::open(zipFileName, O_RDONLY | O_BINARY));
132    if (fd < 0) {
133        ALOGW("Unable to open zip '%s': %s\n", zipFileName, strerror(errno));
134        return NAME_NOT_FOUND;
135    }
136
137    mFileLength = lseek64(fd, 0, SEEK_END);
138    if (mFileLength < kEOCDLen) {
139        TEMP_FAILURE_RETRY(close(fd));
140        return UNKNOWN_ERROR;
141    }
142
143    if (mFileName != NULL) {
144        free(mFileName);
145    }
146    mFileName = strdup(zipFileName);
147
148    mFd = fd;
149
150    /*
151     * Find the Central Directory and store its size and number of entries.
152     */
153    if (!mapCentralDirectory()) {
154        goto bail;
155    }
156
157    /*
158     * Verify Central Directory and create data structures for fast access.
159     */
160    if (!parseZipArchive()) {
161        goto bail;
162    }
163
164    return OK;
165
166bail:
167    free(mFileName);
168    mFileName = NULL;
169    TEMP_FAILURE_RETRY(close(fd));
170    return UNKNOWN_ERROR;
171}
172
173/*
174 * Parse the Zip archive, verifying its contents and initializing internal
175 * data structures.
176 */
177bool ZipFileRO::mapCentralDirectory(void)
178{
179    ssize_t readAmount = kMaxEOCDSearch;
180    if (readAmount > (ssize_t) mFileLength)
181        readAmount = mFileLength;
182
183    if (readAmount < kEOCDSize) {
184        ALOGW("File too short to be a zip file");
185        return false;
186    }
187
188    unsigned char* scanBuf = (unsigned char*) malloc(readAmount);
189    if (scanBuf == NULL) {
190        ALOGW("couldn't allocate scanBuf: %s", strerror(errno));
191        free(scanBuf);
192        return false;
193    }
194
195    /*
196     * Make sure this is a Zip archive.
197     */
198    if (lseek64(mFd, 0, SEEK_SET) != 0) {
199        ALOGW("seek to start failed: %s", strerror(errno));
200        free(scanBuf);
201        return false;
202    }
203
204    ssize_t actual = TEMP_FAILURE_RETRY(read(mFd, scanBuf, sizeof(int32_t)));
205    if (actual != (ssize_t) sizeof(int32_t)) {
206        ALOGI("couldn't read first signature from zip archive: %s", strerror(errno));
207        free(scanBuf);
208        return false;
209    }
210
211    unsigned int header = get4LE(scanBuf);
212    if (header != kLFHSignature) {
213        ALOGV("Not a Zip archive (found 0x%08x)\n", header);
214        free(scanBuf);
215        return false;
216    }
217
218    /*
219     * Perform the traditional EOCD snipe hunt.
220     *
221     * We're searching for the End of Central Directory magic number,
222     * which appears at the start of the EOCD block.  It's followed by
223     * 18 bytes of EOCD stuff and up to 64KB of archive comment.  We
224     * need to read the last part of the file into a buffer, dig through
225     * it to find the magic number, parse some values out, and use those
226     * to determine the extent of the CD.
227     *
228     * We start by pulling in the last part of the file.
229     */
230    off64_t searchStart = mFileLength - readAmount;
231
232    if (lseek64(mFd, searchStart, SEEK_SET) != searchStart) {
233        ALOGW("seek %ld failed: %s\n",  (long) searchStart, strerror(errno));
234        free(scanBuf);
235        return false;
236    }
237    actual = TEMP_FAILURE_RETRY(read(mFd, scanBuf, readAmount));
238    if (actual != (ssize_t) readAmount) {
239        ALOGW("Zip: read " ZD ", expected " ZD ". Failed: %s\n",
240            (ZD_TYPE) actual, (ZD_TYPE) readAmount, strerror(errno));
241        free(scanBuf);
242        return false;
243    }
244
245    /*
246     * Scan backward for the EOCD magic.  In an archive without a trailing
247     * comment, we'll find it on the first try.  (We may want to consider
248     * doing an initial minimal read; if we don't find it, retry with a
249     * second read as above.)
250     */
251    int i;
252    for (i = readAmount - kEOCDLen; i >= 0; i--) {
253        if (scanBuf[i] == 0x50 && get4LE(&scanBuf[i]) == kEOCDSignature) {
254            ALOGV("+++ Found EOCD at buf+%d\n", i);
255            break;
256        }
257    }
258    if (i < 0) {
259        ALOGD("Zip: EOCD not found, %s is not zip\n", mFileName);
260        free(scanBuf);
261        return false;
262    }
263
264    off64_t eocdOffset = searchStart + i;
265    const unsigned char* eocdPtr = scanBuf + i;
266
267    assert(eocdOffset < mFileLength);
268
269    /*
270     * Grab the CD offset and size, and the number of entries in the
271     * archive. After that, we can release our EOCD hunt buffer.
272     */
273    unsigned int diskNumber = get2LE(eocdPtr + kEOCDDiskNumber);
274    unsigned int diskWithCentralDir = get2LE(eocdPtr + kEOCDDiskNumberForCD);
275    unsigned int numEntries = get2LE(eocdPtr + kEOCDNumEntries);
276    unsigned int totalNumEntries = get2LE(eocdPtr + kEOCDTotalNumEntries);
277    unsigned int centralDirSize = get4LE(eocdPtr + kEOCDSize);
278    unsigned int centralDirOffset = get4LE(eocdPtr + kEOCDFileOffset);
279    unsigned int commentSize = get2LE(eocdPtr + kEOCDCommentSize);
280    free(scanBuf);
281
282    // Verify that they look reasonable.
283    if ((long long) centralDirOffset + (long long) centralDirSize > (long long) eocdOffset) {
284        ALOGW("bad offsets (dir %ld, size %u, eocd %ld)\n",
285            (long) centralDirOffset, centralDirSize, (long) eocdOffset);
286        return false;
287    }
288    if (numEntries == 0) {
289        ALOGW("empty archive?\n");
290        return false;
291    } else if (numEntries != totalNumEntries || diskNumber != 0 || diskWithCentralDir != 0) {
292        ALOGW("spanned archives not supported");
293        return false;
294    }
295
296    // Check to see if comment is a sane size
297    if ((commentSize > (mFileLength - kEOCDLen))
298            || (eocdOffset > (mFileLength - kEOCDLen) - commentSize)) {
299        ALOGW("comment size runs off end of file");
300        return false;
301    }
302
303    ALOGV("+++ numEntries=%d dirSize=%d dirOffset=%d\n",
304        numEntries, centralDirSize, centralDirOffset);
305
306    mDirectoryMap = new FileMap();
307    if (mDirectoryMap == NULL) {
308        ALOGW("Unable to create directory map: %s", strerror(errno));
309        return false;
310    }
311
312    if (!mDirectoryMap->create(mFileName, mFd, centralDirOffset, centralDirSize, true)) {
313        ALOGW("Unable to map '%s' (" ZD " to " ZD "): %s\n", mFileName,
314                (ZD_TYPE) centralDirOffset, (ZD_TYPE) (centralDirOffset + centralDirSize), strerror(errno));
315        return false;
316    }
317
318    mNumEntries = numEntries;
319    mDirectoryOffset = centralDirOffset;
320
321    return true;
322}
323
324
325/*
326 * Round up to the next highest power of 2.
327 *
328 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
329 */
330static unsigned int roundUpPower2(unsigned int val)
331{
332    val--;
333    val |= val >> 1;
334    val |= val >> 2;
335    val |= val >> 4;
336    val |= val >> 8;
337    val |= val >> 16;
338    val++;
339
340    return val;
341}
342
343bool ZipFileRO::parseZipArchive(void)
344{
345    bool result = false;
346    const unsigned char* cdPtr = (const unsigned char*) mDirectoryMap->getDataPtr();
347    size_t cdLength = mDirectoryMap->getDataLength();
348    int numEntries = mNumEntries;
349
350    /*
351     * Create hash table.  We have a minimum 75% load factor, possibly as
352     * low as 50% after we round off to a power of 2.
353     */
354    mHashTableSize = roundUpPower2(1 + (numEntries * 4) / 3);
355    mHashTable = (HashEntry*) calloc(mHashTableSize, sizeof(HashEntry));
356
357    /*
358     * Walk through the central directory, adding entries to the hash
359     * table.
360     */
361    const unsigned char* ptr = cdPtr;
362    for (int i = 0; i < numEntries; i++) {
363        if (get4LE(ptr) != kCDESignature) {
364            ALOGW("Missed a central dir sig (at %d)\n", i);
365            goto bail;
366        }
367        if (ptr + kCDELen > cdPtr + cdLength) {
368            ALOGW("Ran off the end (at %d)\n", i);
369            goto bail;
370        }
371
372        long localHdrOffset = (long) get4LE(ptr + kCDELocalOffset);
373        if (localHdrOffset >= mDirectoryOffset) {
374            ALOGW("bad LFH offset %ld at entry %d\n", localHdrOffset, i);
375            goto bail;
376        }
377
378        unsigned int gpbf = get2LE(ptr + kCDEGPBFlags);
379        if ((gpbf & kGPFUnsupportedMask) != 0) {
380            ALOGW("Invalid General Purpose Bit Flag: %d", gpbf);
381            goto bail;
382        }
383
384        unsigned int nameLen = get2LE(ptr + kCDENameLen);
385        unsigned int extraLen = get2LE(ptr + kCDEExtraLen);
386        unsigned int commentLen = get2LE(ptr + kCDECommentLen);
387
388        const char *name = (const char *) ptr + kCDELen;
389
390        /* Check name for NULL characters */
391        if (memchr(name, 0, nameLen) != NULL) {
392            ALOGW("Filename contains NUL byte");
393            goto bail;
394        }
395
396        /* add the CDE filename to the hash table */
397        unsigned int hash = computeHash(name, nameLen);
398        addToHash(name, nameLen, hash);
399
400        /* We don't care about the comment or extra data. */
401        ptr += kCDELen + nameLen + extraLen + commentLen;
402        if ((size_t)(ptr - cdPtr) > cdLength) {
403            ALOGW("bad CD advance (%d vs " ZD ") at entry %d\n",
404                (int) (ptr - cdPtr), (ZD_TYPE) cdLength, i);
405            goto bail;
406        }
407    }
408    ALOGV("+++ zip good scan %d entries\n", numEntries);
409    result = true;
410
411bail:
412    return result;
413}
414
415/*
416 * Simple string hash function for non-null-terminated strings.
417 */
418/*static*/ unsigned int ZipFileRO::computeHash(const char* str, int len)
419{
420    unsigned int hash = 0;
421
422    while (len--)
423        hash = hash * 31 + *str++;
424
425    return hash;
426}
427
428/*
429 * Add a new entry to the hash table.
430 */
431void ZipFileRO::addToHash(const char* str, int strLen, unsigned int hash)
432{
433    int ent = hash & (mHashTableSize-1);
434
435    /*
436     * We over-allocate the table, so we're guaranteed to find an empty slot.
437     */
438    while (mHashTable[ent].name != NULL)
439        ent = (ent + 1) & (mHashTableSize-1);
440
441    mHashTable[ent].name = str;
442    mHashTable[ent].nameLen = strLen;
443}
444
445/*
446 * Find a matching entry.
447 *
448 * Returns NULL if not found.
449 */
450ZipEntryRO ZipFileRO::findEntryByName(const char* fileName) const
451{
452    /*
453     * If the ZipFileRO instance is not initialized, the entry number will
454     * end up being garbage since mHashTableSize is -1.
455     */
456    if (mHashTableSize <= 0) {
457        return NULL;
458    }
459
460    int nameLen = strlen(fileName);
461    unsigned int hash = computeHash(fileName, nameLen);
462    int ent = hash & (mHashTableSize-1);
463
464    while (mHashTable[ent].name != NULL) {
465        if (mHashTable[ent].nameLen == nameLen &&
466            memcmp(mHashTable[ent].name, fileName, nameLen) == 0)
467        {
468            /* match */
469            return (ZipEntryRO)(long)(ent + kZipEntryAdj);
470        }
471
472        ent = (ent + 1) & (mHashTableSize-1);
473    }
474
475    return NULL;
476}
477
478/*
479 * Find the Nth entry.
480 *
481 * This currently involves walking through the sparse hash table, counting
482 * non-empty entries.  If we need to speed this up we can either allocate
483 * a parallel lookup table or (perhaps better) provide an iterator interface.
484 */
485ZipEntryRO ZipFileRO::findEntryByIndex(int idx) const
486{
487    if (idx < 0 || idx >= mNumEntries) {
488        ALOGW("Invalid index %d\n", idx);
489        return NULL;
490    }
491
492    for (int ent = 0; ent < mHashTableSize; ent++) {
493        if (mHashTable[ent].name != NULL) {
494            if (idx-- == 0)
495                return (ZipEntryRO) (intptr_t)(ent + kZipEntryAdj);
496        }
497    }
498
499    return NULL;
500}
501
502/*
503 * Get the useful fields from the zip entry.
504 *
505 * Returns "false" if the offsets to the fields or the contents of the fields
506 * appear to be bogus.
507 */
508bool ZipFileRO::getEntryInfo(ZipEntryRO entry, int* pMethod, size_t* pUncompLen,
509    size_t* pCompLen, off64_t* pOffset, long* pModWhen, long* pCrc32) const
510{
511    bool ret = false;
512
513    const int ent = entryToIndex(entry);
514    if (ent < 0) {
515        ALOGW("cannot find entry");
516        return false;
517    }
518
519    HashEntry hashEntry = mHashTable[ent];
520
521    /*
522     * Recover the start of the central directory entry from the filename
523     * pointer.  The filename is the first entry past the fixed-size data,
524     * so we can just subtract back from that.
525     */
526    const unsigned char* ptr = (const unsigned char*) hashEntry.name;
527    off64_t cdOffset = mDirectoryOffset;
528
529    ptr -= kCDELen;
530
531    int method = get2LE(ptr + kCDEMethod);
532    if (pMethod != NULL)
533        *pMethod = method;
534
535    if (pModWhen != NULL)
536        *pModWhen = get4LE(ptr + kCDEModWhen);
537    if (pCrc32 != NULL)
538        *pCrc32 = get4LE(ptr + kCDECRC);
539
540    size_t compLen = get4LE(ptr + kCDECompLen);
541    if (pCompLen != NULL)
542        *pCompLen = compLen;
543    size_t uncompLen = get4LE(ptr + kCDEUncompLen);
544    if (pUncompLen != NULL)
545        *pUncompLen = uncompLen;
546
547    /*
548     * If requested, determine the offset of the start of the data.  All we
549     * have is the offset to the Local File Header, which is variable size,
550     * so we have to read the contents of the struct to figure out where
551     * the actual data starts.
552     *
553     * We also need to make sure that the lengths are not so large that
554     * somebody trying to map the compressed or uncompressed data runs
555     * off the end of the mapped region.
556     *
557     * Note we don't verify compLen/uncompLen if they don't request the
558     * dataOffset, because dataOffset is expensive to determine.  However,
559     * if they don't have the file offset, they're not likely to be doing
560     * anything with the contents.
561     */
562    if (pOffset != NULL) {
563        long localHdrOffset = get4LE(ptr + kCDELocalOffset);
564        if (localHdrOffset + kLFHLen >= cdOffset) {
565            ALOGE("ERROR: bad local hdr offset in zip\n");
566            return false;
567        }
568
569        unsigned char lfhBuf[kLFHLen];
570
571#ifdef HAVE_PREAD
572        /*
573         * This file descriptor might be from zygote's preloaded assets,
574         * so we need to do an pread64() instead of a lseek64() + read() to
575         * guarantee atomicity across the processes with the shared file
576         * descriptors.
577         */
578        ssize_t actual =
579                TEMP_FAILURE_RETRY(pread64(mFd, lfhBuf, sizeof(lfhBuf), localHdrOffset));
580
581        if (actual != sizeof(lfhBuf)) {
582            ALOGW("failed reading lfh from offset %ld\n", localHdrOffset);
583            return false;
584        }
585
586        if (get4LE(lfhBuf) != kLFHSignature) {
587            ALOGW("didn't find signature at start of lfh; wanted: offset=%ld data=0x%08x; "
588                    "got: data=0x%08lx\n",
589                    localHdrOffset, kLFHSignature, get4LE(lfhBuf));
590            return false;
591        }
592#else /* HAVE_PREAD */
593        /*
594         * For hosts don't have pread64() we cannot guarantee atomic reads from
595         * an offset in a file. Android should never run on those platforms.
596         * File descriptors inherited from a fork() share file offsets and
597         * there would be nothing to protect from two different processes
598         * calling lseek64() concurrently.
599         */
600
601        {
602            AutoMutex _l(mFdLock);
603
604            if (lseek64(mFd, localHdrOffset, SEEK_SET) != localHdrOffset) {
605                ALOGW("failed seeking to lfh at offset %ld\n", localHdrOffset);
606                return false;
607            }
608
609            ssize_t actual =
610                    TEMP_FAILURE_RETRY(read(mFd, lfhBuf, sizeof(lfhBuf)));
611            if (actual != sizeof(lfhBuf)) {
612                ALOGW("failed reading lfh from offset %ld\n", localHdrOffset);
613                return false;
614            }
615
616            if (get4LE(lfhBuf) != kLFHSignature) {
617                off64_t actualOffset = lseek64(mFd, 0, SEEK_CUR);
618                ALOGW("didn't find signature at start of lfh; wanted: offset=%ld data=0x%08x; "
619                        "got: offset=" ZD " data=0x%08lx\n",
620                        localHdrOffset, kLFHSignature, (ZD_TYPE) actualOffset, get4LE(lfhBuf));
621                return false;
622            }
623        }
624#endif /* HAVE_PREAD */
625
626        unsigned int gpbf = get2LE(lfhBuf + kLFHGPBFlags);
627        if ((gpbf & kGPFUnsupportedMask) != 0) {
628            ALOGW("Invalid General Purpose Bit Flag: %d", gpbf);
629            return false;
630        }
631
632        off64_t dataOffset = localHdrOffset + kLFHLen
633            + get2LE(lfhBuf + kLFHNameLen) + get2LE(lfhBuf + kLFHExtraLen);
634        if (dataOffset >= cdOffset) {
635            ALOGW("bad data offset %ld in zip\n", (long) dataOffset);
636            return false;
637        }
638
639        /* check lengths */
640        if ((dataOffset >= cdOffset) || (compLen > (cdOffset - dataOffset))) {
641            ALOGW("bad compressed length in zip (%ld + " ZD " > %ld)\n",
642                (long) dataOffset, (ZD_TYPE) compLen, (long) cdOffset);
643            return false;
644        }
645
646        if (method == kCompressStored &&
647            ((dataOffset >= cdOffset) ||
648             (uncompLen > (cdOffset - dataOffset))))
649        {
650            ALOGE("ERROR: bad uncompressed length in zip (%ld + " ZD " > %ld)\n",
651                (long) dataOffset, (ZD_TYPE) uncompLen, (long) cdOffset);
652            return false;
653        }
654
655        *pOffset = dataOffset;
656    }
657
658    return true;
659}
660
661/*
662 * Copy the entry's filename to the buffer.
663 */
664int ZipFileRO::getEntryFileName(ZipEntryRO entry, char* buffer, int bufLen)
665    const
666{
667    int ent = entryToIndex(entry);
668    if (ent < 0)
669        return -1;
670
671    int nameLen = mHashTable[ent].nameLen;
672    if (bufLen < nameLen+1)
673        return nameLen+1;
674
675    memcpy(buffer, mHashTable[ent].name, nameLen);
676    buffer[nameLen] = '\0';
677    return 0;
678}
679
680/*
681 * Create a new FileMap object that spans the data in "entry".
682 */
683FileMap* ZipFileRO::createEntryFileMap(ZipEntryRO entry) const
684{
685    /*
686     * TODO: the efficient way to do this is to modify FileMap to allow
687     * sub-regions of a file to be mapped.  A reference-counting scheme
688     * can manage the base memory mapping.  For now, we just create a brand
689     * new mapping off of the Zip archive file descriptor.
690     */
691
692    FileMap* newMap;
693    int method;
694    size_t uncompLen;
695    size_t compLen;
696    off64_t offset;
697
698    if (!getEntryInfo(entry, &method, &uncompLen, &compLen, &offset, NULL, NULL)) {
699        return NULL;
700    }
701
702    size_t actualLen;
703    if (method == kCompressStored) {
704        actualLen = uncompLen;
705    } else {
706        actualLen = compLen;
707    }
708
709    newMap = new FileMap();
710    if (!newMap->create(mFileName, mFd, offset, actualLen, true)) {
711        newMap->release();
712        return NULL;
713    }
714
715    return newMap;
716}
717
718/*
719 * Uncompress an entry, in its entirety, into the provided output buffer.
720 *
721 * This doesn't verify the data's CRC, which might be useful for
722 * uncompressed data.  The caller should be able to manage it.
723 */
724bool ZipFileRO::uncompressEntry(ZipEntryRO entry, void* buffer) const
725{
726    const size_t kSequentialMin = 32768;
727    bool result = false;
728    int ent = entryToIndex(entry);
729    if (ent < 0) {
730        return false;
731    }
732
733    int method;
734    size_t uncompLen, compLen;
735    off64_t offset;
736    const unsigned char* ptr;
737    FileMap *file;
738
739    if (!getEntryInfo(entry, &method, &uncompLen, &compLen, &offset, NULL, NULL)) {
740        goto bail;
741    }
742
743    file = createEntryFileMap(entry);
744    if (file == NULL) {
745        goto bail;
746    }
747
748    ptr = (const unsigned char*) file->getDataPtr();
749
750    /*
751     * Experiment with madvise hint.  When we want to uncompress a file,
752     * we pull some stuff out of the central dir entry and then hit a
753     * bunch of compressed or uncompressed data sequentially.  The CDE
754     * visit will cause a limited amount of read-ahead because it's at
755     * the end of the file.  We could end up doing lots of extra disk
756     * access if the file we're prying open is small.  Bottom line is we
757     * probably don't want to turn MADV_SEQUENTIAL on and leave it on.
758     *
759     * So, if the compressed size of the file is above a certain minimum
760     * size, temporarily boost the read-ahead in the hope that the extra
761     * pair of system calls are negated by a reduction in page faults.
762     */
763    if (compLen > kSequentialMin)
764        file->advise(FileMap::SEQUENTIAL);
765
766    if (method == kCompressStored) {
767        memcpy(buffer, ptr, uncompLen);
768    } else {
769        if (!inflateBuffer(buffer, ptr, uncompLen, compLen))
770            goto unmap;
771    }
772
773    if (compLen > kSequentialMin)
774        file->advise(FileMap::NORMAL);
775
776    result = true;
777
778unmap:
779    file->release();
780bail:
781    return result;
782}
783
784/*
785 * Uncompress an entry, in its entirety, to an open file descriptor.
786 *
787 * This doesn't verify the data's CRC, but probably should.
788 */
789bool ZipFileRO::uncompressEntry(ZipEntryRO entry, int fd) const
790{
791    bool result = false;
792    int ent = entryToIndex(entry);
793    if (ent < 0) {
794        return false;
795    }
796
797    int method;
798    size_t uncompLen, compLen;
799    off64_t offset;
800    const unsigned char* ptr;
801    FileMap *file;
802
803    if (!getEntryInfo(entry, &method, &uncompLen, &compLen, &offset, NULL, NULL)) {
804        goto bail;
805    }
806
807    file = createEntryFileMap(entry);
808    if (file == NULL) {
809        goto bail;
810    }
811
812    ptr = (const unsigned char*) file->getDataPtr();
813
814    if (method == kCompressStored) {
815        ssize_t actual = TEMP_FAILURE_RETRY(write(fd, ptr, uncompLen));
816        if (actual < 0) {
817            ALOGE("Write failed: %s\n", strerror(errno));
818            goto unmap;
819        } else if ((size_t) actual != uncompLen) {
820            ALOGE("Partial write during uncompress (" ZD " of " ZD ")\n",
821                (ZD_TYPE) actual, (ZD_TYPE) uncompLen);
822            goto unmap;
823        } else {
824            ALOGI("+++ successful write\n");
825        }
826    } else {
827        if (!inflateBuffer(fd, ptr, uncompLen, compLen)) {
828            goto unmap;
829        }
830    }
831
832    result = true;
833
834unmap:
835    file->release();
836bail:
837    return result;
838}
839
840/*
841 * Uncompress "deflate" data from one buffer to another.
842 */
843/*static*/ bool ZipFileRO::inflateBuffer(void* outBuf, const void* inBuf,
844    size_t uncompLen, size_t compLen)
845{
846    bool result = false;
847    z_stream zstream;
848    int zerr;
849
850    /*
851     * Initialize the zlib stream struct.
852     */
853    memset(&zstream, 0, sizeof(zstream));
854    zstream.zalloc = Z_NULL;
855    zstream.zfree = Z_NULL;
856    zstream.opaque = Z_NULL;
857    zstream.next_in = (Bytef*)inBuf;
858    zstream.avail_in = compLen;
859    zstream.next_out = (Bytef*) outBuf;
860    zstream.avail_out = uncompLen;
861    zstream.data_type = Z_UNKNOWN;
862
863    /*
864     * Use the undocumented "negative window bits" feature to tell zlib
865     * that there's no zlib header waiting for it.
866     */
867    zerr = inflateInit2(&zstream, -MAX_WBITS);
868    if (zerr != Z_OK) {
869        if (zerr == Z_VERSION_ERROR) {
870            ALOGE("Installed zlib is not compatible with linked version (%s)\n",
871                ZLIB_VERSION);
872        } else {
873            ALOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr);
874        }
875        goto bail;
876    }
877
878    /*
879     * Expand data.
880     */
881    zerr = inflate(&zstream, Z_FINISH);
882    if (zerr != Z_STREAM_END) {
883        ALOGW("Zip inflate failed, zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n",
884            zerr, zstream.next_in, zstream.avail_in,
885            zstream.next_out, zstream.avail_out);
886        goto z_bail;
887    }
888
889    /* paranoia */
890    if (zstream.total_out != uncompLen) {
891        ALOGW("Size mismatch on inflated file (%ld vs " ZD ")\n",
892            zstream.total_out, (ZD_TYPE) uncompLen);
893        goto z_bail;
894    }
895
896    result = true;
897
898z_bail:
899    inflateEnd(&zstream);        /* free up any allocated structures */
900
901bail:
902    return result;
903}
904
905/*
906 * Uncompress "deflate" data from one buffer to an open file descriptor.
907 */
908/*static*/ bool ZipFileRO::inflateBuffer(int fd, const void* inBuf,
909    size_t uncompLen, size_t compLen)
910{
911    bool result = false;
912    const size_t kWriteBufSize = 32768;
913    unsigned char writeBuf[kWriteBufSize];
914    z_stream zstream;
915    int zerr;
916
917    /*
918     * Initialize the zlib stream struct.
919     */
920    memset(&zstream, 0, sizeof(zstream));
921    zstream.zalloc = Z_NULL;
922    zstream.zfree = Z_NULL;
923    zstream.opaque = Z_NULL;
924    zstream.next_in = (Bytef*)inBuf;
925    zstream.avail_in = compLen;
926    zstream.next_out = (Bytef*) writeBuf;
927    zstream.avail_out = sizeof(writeBuf);
928    zstream.data_type = Z_UNKNOWN;
929
930    /*
931     * Use the undocumented "negative window bits" feature to tell zlib
932     * that there's no zlib header waiting for it.
933     */
934    zerr = inflateInit2(&zstream, -MAX_WBITS);
935    if (zerr != Z_OK) {
936        if (zerr == Z_VERSION_ERROR) {
937            ALOGE("Installed zlib is not compatible with linked version (%s)\n",
938                ZLIB_VERSION);
939        } else {
940            ALOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr);
941        }
942        goto bail;
943    }
944
945    /*
946     * Loop while we have more to do.
947     */
948    do {
949        /*
950         * Expand data.
951         */
952        zerr = inflate(&zstream, Z_NO_FLUSH);
953        if (zerr != Z_OK && zerr != Z_STREAM_END) {
954            ALOGW("zlib inflate: zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n",
955                zerr, zstream.next_in, zstream.avail_in,
956                zstream.next_out, zstream.avail_out);
957            goto z_bail;
958        }
959
960        /* write when we're full or when we're done */
961        if (zstream.avail_out == 0 ||
962            (zerr == Z_STREAM_END && zstream.avail_out != sizeof(writeBuf)))
963        {
964            long writeSize = zstream.next_out - writeBuf;
965            int cc = TEMP_FAILURE_RETRY(write(fd, writeBuf, writeSize));
966            if (cc < 0) {
967                ALOGW("write failed in inflate: %s", strerror(errno));
968                goto z_bail;
969            } else if (cc != (int) writeSize) {
970                ALOGW("write failed in inflate (%d vs %ld)", cc, writeSize);
971                goto z_bail;
972            }
973
974            zstream.next_out = writeBuf;
975            zstream.avail_out = sizeof(writeBuf);
976        }
977    } while (zerr == Z_OK);
978
979    assert(zerr == Z_STREAM_END);       /* other errors should've been caught */
980
981    /* paranoia */
982    if (zstream.total_out != uncompLen) {
983        ALOGW("Size mismatch on inflated file (%ld vs " ZD ")\n",
984            zstream.total_out, (ZD_TYPE) uncompLen);
985        goto z_bail;
986    }
987
988    result = true;
989
990z_bail:
991    inflateEnd(&zstream);        /* free up any allocated structures */
992
993bail:
994    return result;
995}
996