1/*
2 * Copyright (C) 2007 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//
18// Read-only access to Zip archives, with minimal heap allocation.
19//
20#define LOG_TAG "zipro"
21//#define LOG_NDEBUG 0
22#include <utils/Log.h>
23#include <utils/ZipFileRO.h>
24#include <utils/misc.h>
25#include <utils/threads.h>
26
27#include <zlib.h>
28
29#include <string.h>
30#include <fcntl.h>
31#include <errno.h>
32#include <assert.h>
33#include <unistd.h>
34
35#if HAVE_PRINTF_ZD
36#  define ZD "%zd"
37#  define ZD_TYPE ssize_t
38#else
39#  define ZD "%ld"
40#  define ZD_TYPE long
41#endif
42
43/*
44 * We must open binary files using open(path, ... | O_BINARY) under Windows.
45 * Otherwise strange read errors will happen.
46 */
47#ifndef O_BINARY
48#  define O_BINARY  0
49#endif
50
51/*
52 * TEMP_FAILURE_RETRY is defined by some, but not all, versions of
53 * <unistd.h>. (Alas, it is not as standard as we'd hoped!) So, if it's
54 * not already defined, then define it here.
55 */
56#ifndef TEMP_FAILURE_RETRY
57/* Used to retry syscalls that can return EINTR. */
58#define TEMP_FAILURE_RETRY(exp) ({         \
59    typeof (exp) _rc;                      \
60    do {                                   \
61        _rc = (exp);                       \
62    } while (_rc == -1 && errno == EINTR); \
63    _rc; })
64#endif
65
66using namespace android;
67
68/*
69 * Zip file constants.
70 */
71#define kEOCDSignature      0x06054b50
72#define kEOCDLen            22
73#define kEOCDNumEntries     8               // offset to #of entries in file
74#define kEOCDSize           12              // size of the central directory
75#define kEOCDFileOffset     16              // offset to central directory
76
77#define kMaxCommentLen      65535           // longest possible in ushort
78#define kMaxEOCDSearch      (kMaxCommentLen + kEOCDLen)
79
80#define kLFHSignature       0x04034b50
81#define kLFHLen             30              // excluding variable-len fields
82#define kLFHNameLen         26              // offset to filename length
83#define kLFHExtraLen        28              // offset to extra length
84
85#define kCDESignature       0x02014b50
86#define kCDELen             46              // excluding variable-len fields
87#define kCDEMethod          10              // offset to compression method
88#define kCDEModWhen         12              // offset to modification timestamp
89#define kCDECRC             16              // offset to entry CRC
90#define kCDECompLen         20              // offset to compressed length
91#define kCDEUncompLen       24              // offset to uncompressed length
92#define kCDENameLen         28              // offset to filename length
93#define kCDEExtraLen        30              // offset to extra length
94#define kCDECommentLen      32              // offset to comment length
95#define kCDELocalOffset     42              // offset to local hdr
96
97/*
98 * The values we return for ZipEntryRO use 0 as an invalid value, so we
99 * want to adjust the hash table index by a fixed amount.  Using a large
100 * value helps insure that people don't mix & match arguments, e.g. to
101 * findEntryByIndex().
102 */
103#define kZipEntryAdj        10000
104
105ZipFileRO::~ZipFileRO() {
106    free(mHashTable);
107    if (mDirectoryMap)
108        mDirectoryMap->release();
109    if (mFd >= 0)
110        TEMP_FAILURE_RETRY(close(mFd));
111    if (mFileName)
112        free(mFileName);
113}
114
115/*
116 * Convert a ZipEntryRO to a hash table index, verifying that it's in a
117 * valid range.
118 */
119int ZipFileRO::entryToIndex(const ZipEntryRO entry) const
120{
121    long ent = ((intptr_t) entry) - kZipEntryAdj;
122    if (ent < 0 || ent >= mHashTableSize || mHashTable[ent].name == NULL) {
123        ALOGW("Invalid ZipEntryRO %p (%ld)\n", entry, ent);
124        return -1;
125    }
126    return ent;
127}
128
129
130/*
131 * Open the specified file read-only.  We memory-map the entire thing and
132 * close the file before returning.
133 */
134status_t ZipFileRO::open(const char* zipFileName)
135{
136    int fd = -1;
137
138    assert(mDirectoryMap == NULL);
139
140    /*
141     * Open and map the specified file.
142     */
143    fd = TEMP_FAILURE_RETRY(::open(zipFileName, O_RDONLY | O_BINARY));
144    if (fd < 0) {
145        ALOGW("Unable to open zip '%s': %s\n", zipFileName, strerror(errno));
146        return NAME_NOT_FOUND;
147    }
148
149    mFileLength = lseek64(fd, 0, SEEK_END);
150    if (mFileLength < kEOCDLen) {
151        TEMP_FAILURE_RETRY(close(fd));
152        return UNKNOWN_ERROR;
153    }
154
155    if (mFileName != NULL) {
156        free(mFileName);
157    }
158    mFileName = strdup(zipFileName);
159
160    mFd = fd;
161
162    /*
163     * Find the Central Directory and store its size and number of entries.
164     */
165    if (!mapCentralDirectory()) {
166        goto bail;
167    }
168
169    /*
170     * Verify Central Directory and create data structures for fast access.
171     */
172    if (!parseZipArchive()) {
173        goto bail;
174    }
175
176    return OK;
177
178bail:
179    free(mFileName);
180    mFileName = NULL;
181    TEMP_FAILURE_RETRY(close(fd));
182    return UNKNOWN_ERROR;
183}
184
185/*
186 * Parse the Zip archive, verifying its contents and initializing internal
187 * data structures.
188 */
189bool ZipFileRO::mapCentralDirectory(void)
190{
191    ssize_t readAmount = kMaxEOCDSearch;
192    if (readAmount > (ssize_t) mFileLength)
193        readAmount = mFileLength;
194
195    unsigned char* scanBuf = (unsigned char*) malloc(readAmount);
196    if (scanBuf == NULL) {
197        ALOGW("couldn't allocate scanBuf: %s", strerror(errno));
198        free(scanBuf);
199        return false;
200    }
201
202    /*
203     * Make sure this is a Zip archive.
204     */
205    if (lseek64(mFd, 0, SEEK_SET) != 0) {
206        ALOGW("seek to start failed: %s", strerror(errno));
207        free(scanBuf);
208        return false;
209    }
210
211    ssize_t actual = TEMP_FAILURE_RETRY(read(mFd, scanBuf, sizeof(int32_t)));
212    if (actual != (ssize_t) sizeof(int32_t)) {
213        ALOGI("couldn't read first signature from zip archive: %s", strerror(errno));
214        free(scanBuf);
215        return false;
216    }
217
218    {
219        unsigned int header = get4LE(scanBuf);
220        if (header == kEOCDSignature) {
221            ALOGI("Found Zip archive, but it looks empty\n");
222            free(scanBuf);
223            return false;
224        } else if (header != kLFHSignature) {
225            ALOGV("Not a Zip archive (found 0x%08x)\n", header);
226            free(scanBuf);
227            return false;
228        }
229    }
230
231    /*
232     * Perform the traditional EOCD snipe hunt.
233     *
234     * We're searching for the End of Central Directory magic number,
235     * which appears at the start of the EOCD block.  It's followed by
236     * 18 bytes of EOCD stuff and up to 64KB of archive comment.  We
237     * need to read the last part of the file into a buffer, dig through
238     * it to find the magic number, parse some values out, and use those
239     * to determine the extent of the CD.
240     *
241     * We start by pulling in the last part of the file.
242     */
243    off64_t searchStart = mFileLength - readAmount;
244
245    if (lseek64(mFd, searchStart, SEEK_SET) != searchStart) {
246        ALOGW("seek %ld failed: %s\n",  (long) searchStart, strerror(errno));
247        free(scanBuf);
248        return false;
249    }
250    actual = TEMP_FAILURE_RETRY(read(mFd, scanBuf, readAmount));
251    if (actual != (ssize_t) readAmount) {
252        ALOGW("Zip: read " ZD ", expected " ZD ". Failed: %s\n",
253            (ZD_TYPE) actual, (ZD_TYPE) readAmount, strerror(errno));
254        free(scanBuf);
255        return false;
256    }
257
258    /*
259     * Scan backward for the EOCD magic.  In an archive without a trailing
260     * comment, we'll find it on the first try.  (We may want to consider
261     * doing an initial minimal read; if we don't find it, retry with a
262     * second read as above.)
263     */
264    int i;
265    for (i = readAmount - kEOCDLen; i >= 0; i--) {
266        if (scanBuf[i] == 0x50 && get4LE(&scanBuf[i]) == kEOCDSignature) {
267            ALOGV("+++ Found EOCD at buf+%d\n", i);
268            break;
269        }
270    }
271    if (i < 0) {
272        ALOGD("Zip: EOCD not found, %s is not zip\n", mFileName);
273        free(scanBuf);
274        return false;
275    }
276
277    off64_t eocdOffset = searchStart + i;
278    const unsigned char* eocdPtr = scanBuf + i;
279
280    assert(eocdOffset < mFileLength);
281
282    /*
283     * Grab the CD offset and size, and the number of entries in the
284     * archive. After that, we can release our EOCD hunt buffer.
285     */
286    unsigned int numEntries = get2LE(eocdPtr + kEOCDNumEntries);
287    unsigned int dirSize = get4LE(eocdPtr + kEOCDSize);
288    unsigned int dirOffset = get4LE(eocdPtr + kEOCDFileOffset);
289    free(scanBuf);
290
291    // Verify that they look reasonable.
292    if ((long long) dirOffset + (long long) dirSize > (long long) eocdOffset) {
293        ALOGW("bad offsets (dir %ld, size %u, eocd %ld)\n",
294            (long) dirOffset, dirSize, (long) eocdOffset);
295        return false;
296    }
297    if (numEntries == 0) {
298        ALOGW("empty archive?\n");
299        return false;
300    }
301
302    ALOGV("+++ numEntries=%d dirSize=%d dirOffset=%d\n",
303        numEntries, dirSize, dirOffset);
304
305    mDirectoryMap = new FileMap();
306    if (mDirectoryMap == NULL) {
307        ALOGW("Unable to create directory map: %s", strerror(errno));
308        return false;
309    }
310
311    if (!mDirectoryMap->create(mFileName, mFd, dirOffset, dirSize, true)) {
312        ALOGW("Unable to map '%s' (" ZD " to " ZD "): %s\n", mFileName,
313                (ZD_TYPE) dirOffset, (ZD_TYPE) (dirOffset + dirSize), strerror(errno));
314        return false;
315    }
316
317    mNumEntries = numEntries;
318    mDirectoryOffset = dirOffset;
319
320    return true;
321}
322
323
324/*
325 * Round up to the next highest power of 2.
326 *
327 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
328 */
329static unsigned int roundUpPower2(unsigned int val)
330{
331    val--;
332    val |= val >> 1;
333    val |= val >> 2;
334    val |= val >> 4;
335    val |= val >> 8;
336    val |= val >> 16;
337    val++;
338
339    return val;
340}
341
342bool ZipFileRO::parseZipArchive(void)
343{
344    bool result = false;
345    const unsigned char* cdPtr = (const unsigned char*) mDirectoryMap->getDataPtr();
346    size_t cdLength = mDirectoryMap->getDataLength();
347    int numEntries = mNumEntries;
348
349    /*
350     * Create hash table.  We have a minimum 75% load factor, possibly as
351     * low as 50% after we round off to a power of 2.
352     */
353    mHashTableSize = roundUpPower2(1 + (numEntries * 4) / 3);
354    mHashTable = (HashEntry*) calloc(mHashTableSize, sizeof(HashEntry));
355
356    /*
357     * Walk through the central directory, adding entries to the hash
358     * table.
359     */
360    const unsigned char* ptr = cdPtr;
361    for (int i = 0; i < numEntries; i++) {
362        if (get4LE(ptr) != kCDESignature) {
363            ALOGW("Missed a central dir sig (at %d)\n", i);
364            goto bail;
365        }
366        if (ptr + kCDELen > cdPtr + cdLength) {
367            ALOGW("Ran off the end (at %d)\n", i);
368            goto bail;
369        }
370
371        long localHdrOffset = (long) get4LE(ptr + kCDELocalOffset);
372        if (localHdrOffset >= mDirectoryOffset) {
373            ALOGW("bad LFH offset %ld at entry %d\n", localHdrOffset, i);
374            goto bail;
375        }
376
377        unsigned int fileNameLen, extraLen, commentLen, hash;
378
379        fileNameLen = get2LE(ptr + kCDENameLen);
380        extraLen = get2LE(ptr + kCDEExtraLen);
381        commentLen = get2LE(ptr + kCDECommentLen);
382
383        /* add the CDE filename to the hash table */
384        hash = computeHash((const char*)ptr + kCDELen, fileNameLen);
385        addToHash((const char*)ptr + kCDELen, fileNameLen, hash);
386
387        ptr += kCDELen + fileNameLen + extraLen + commentLen;
388        if ((size_t)(ptr - cdPtr) > cdLength) {
389            ALOGW("bad CD advance (%d vs " ZD ") at entry %d\n",
390                (int) (ptr - cdPtr), (ZD_TYPE) cdLength, i);
391            goto bail;
392        }
393    }
394    ALOGV("+++ zip good scan %d entries\n", numEntries);
395    result = true;
396
397bail:
398    return result;
399}
400
401/*
402 * Simple string hash function for non-null-terminated strings.
403 */
404/*static*/ unsigned int ZipFileRO::computeHash(const char* str, int len)
405{
406    unsigned int hash = 0;
407
408    while (len--)
409        hash = hash * 31 + *str++;
410
411    return hash;
412}
413
414/*
415 * Add a new entry to the hash table.
416 */
417void ZipFileRO::addToHash(const char* str, int strLen, unsigned int hash)
418{
419    int ent = hash & (mHashTableSize-1);
420
421    /*
422     * We over-allocate the table, so we're guaranteed to find an empty slot.
423     */
424    while (mHashTable[ent].name != NULL)
425        ent = (ent + 1) & (mHashTableSize-1);
426
427    mHashTable[ent].name = str;
428    mHashTable[ent].nameLen = strLen;
429}
430
431/*
432 * Find a matching entry.
433 *
434 * Returns NULL if not found.
435 */
436ZipEntryRO ZipFileRO::findEntryByName(const char* fileName) const
437{
438    /*
439     * If the ZipFileRO instance is not initialized, the entry number will
440     * end up being garbage since mHashTableSize is -1.
441     */
442    if (mHashTableSize <= 0) {
443        return NULL;
444    }
445
446    int nameLen = strlen(fileName);
447    unsigned int hash = computeHash(fileName, nameLen);
448    int ent = hash & (mHashTableSize-1);
449
450    while (mHashTable[ent].name != NULL) {
451        if (mHashTable[ent].nameLen == nameLen &&
452            memcmp(mHashTable[ent].name, fileName, nameLen) == 0)
453        {
454            /* match */
455            return (ZipEntryRO)(long)(ent + kZipEntryAdj);
456        }
457
458        ent = (ent + 1) & (mHashTableSize-1);
459    }
460
461    return NULL;
462}
463
464/*
465 * Find the Nth entry.
466 *
467 * This currently involves walking through the sparse hash table, counting
468 * non-empty entries.  If we need to speed this up we can either allocate
469 * a parallel lookup table or (perhaps better) provide an iterator interface.
470 */
471ZipEntryRO ZipFileRO::findEntryByIndex(int idx) const
472{
473    if (idx < 0 || idx >= mNumEntries) {
474        ALOGW("Invalid index %d\n", idx);
475        return NULL;
476    }
477
478    for (int ent = 0; ent < mHashTableSize; ent++) {
479        if (mHashTable[ent].name != NULL) {
480            if (idx-- == 0)
481                return (ZipEntryRO) (intptr_t)(ent + kZipEntryAdj);
482        }
483    }
484
485    return NULL;
486}
487
488/*
489 * Get the useful fields from the zip entry.
490 *
491 * Returns "false" if the offsets to the fields or the contents of the fields
492 * appear to be bogus.
493 */
494bool ZipFileRO::getEntryInfo(ZipEntryRO entry, int* pMethod, size_t* pUncompLen,
495    size_t* pCompLen, off64_t* pOffset, long* pModWhen, long* pCrc32) const
496{
497    bool ret = false;
498
499    const int ent = entryToIndex(entry);
500    if (ent < 0)
501        return false;
502
503    HashEntry hashEntry = mHashTable[ent];
504
505    /*
506     * Recover the start of the central directory entry from the filename
507     * pointer.  The filename is the first entry past the fixed-size data,
508     * so we can just subtract back from that.
509     */
510    const unsigned char* ptr = (const unsigned char*) hashEntry.name;
511    off64_t cdOffset = mDirectoryOffset;
512
513    ptr -= kCDELen;
514
515    int method = get2LE(ptr + kCDEMethod);
516    if (pMethod != NULL)
517        *pMethod = method;
518
519    if (pModWhen != NULL)
520        *pModWhen = get4LE(ptr + kCDEModWhen);
521    if (pCrc32 != NULL)
522        *pCrc32 = get4LE(ptr + kCDECRC);
523
524    size_t compLen = get4LE(ptr + kCDECompLen);
525    if (pCompLen != NULL)
526        *pCompLen = compLen;
527    size_t uncompLen = get4LE(ptr + kCDEUncompLen);
528    if (pUncompLen != NULL)
529        *pUncompLen = uncompLen;
530
531    /*
532     * If requested, determine the offset of the start of the data.  All we
533     * have is the offset to the Local File Header, which is variable size,
534     * so we have to read the contents of the struct to figure out where
535     * the actual data starts.
536     *
537     * We also need to make sure that the lengths are not so large that
538     * somebody trying to map the compressed or uncompressed data runs
539     * off the end of the mapped region.
540     *
541     * Note we don't verify compLen/uncompLen if they don't request the
542     * dataOffset, because dataOffset is expensive to determine.  However,
543     * if they don't have the file offset, they're not likely to be doing
544     * anything with the contents.
545     */
546    if (pOffset != NULL) {
547        long localHdrOffset = get4LE(ptr + kCDELocalOffset);
548        if (localHdrOffset + kLFHLen >= cdOffset) {
549            ALOGE("ERROR: bad local hdr offset in zip\n");
550            return false;
551        }
552
553        unsigned char lfhBuf[kLFHLen];
554
555#ifdef HAVE_PREAD
556        /*
557         * This file descriptor might be from zygote's preloaded assets,
558         * so we need to do an pread64() instead of a lseek64() + read() to
559         * guarantee atomicity across the processes with the shared file
560         * descriptors.
561         */
562        ssize_t actual =
563                TEMP_FAILURE_RETRY(pread64(mFd, lfhBuf, sizeof(lfhBuf), localHdrOffset));
564
565        if (actual != sizeof(lfhBuf)) {
566            ALOGW("failed reading lfh from offset %ld\n", localHdrOffset);
567            return false;
568        }
569
570        if (get4LE(lfhBuf) != kLFHSignature) {
571            ALOGW("didn't find signature at start of lfh; wanted: offset=%ld data=0x%08x; "
572                    "got: data=0x%08lx\n",
573                    localHdrOffset, kLFHSignature, get4LE(lfhBuf));
574            return false;
575        }
576#else /* HAVE_PREAD */
577        /*
578         * For hosts don't have pread64() we cannot guarantee atomic reads from
579         * an offset in a file. Android should never run on those platforms.
580         * File descriptors inherited from a fork() share file offsets and
581         * there would be nothing to protect from two different processes
582         * calling lseek64() concurrently.
583         */
584
585        {
586            AutoMutex _l(mFdLock);
587
588            if (lseek64(mFd, localHdrOffset, SEEK_SET) != localHdrOffset) {
589                ALOGW("failed seeking to lfh at offset %ld\n", localHdrOffset);
590                return false;
591            }
592
593            ssize_t actual =
594                    TEMP_FAILURE_RETRY(read(mFd, lfhBuf, sizeof(lfhBuf)));
595            if (actual != sizeof(lfhBuf)) {
596                ALOGW("failed reading lfh from offset %ld\n", localHdrOffset);
597                return false;
598            }
599
600            if (get4LE(lfhBuf) != kLFHSignature) {
601                off64_t actualOffset = lseek64(mFd, 0, SEEK_CUR);
602                ALOGW("didn't find signature at start of lfh; wanted: offset=%ld data=0x%08x; "
603                        "got: offset=" ZD " data=0x%08lx\n",
604                        localHdrOffset, kLFHSignature, (ZD_TYPE) actualOffset, get4LE(lfhBuf));
605                return false;
606            }
607        }
608#endif /* HAVE_PREAD */
609
610        off64_t dataOffset = localHdrOffset + kLFHLen
611            + get2LE(lfhBuf + kLFHNameLen) + get2LE(lfhBuf + kLFHExtraLen);
612        if (dataOffset >= cdOffset) {
613            ALOGW("bad data offset %ld in zip\n", (long) dataOffset);
614            return false;
615        }
616
617        /* check lengths */
618        if ((off64_t)(dataOffset + compLen) > cdOffset) {
619            ALOGW("bad compressed length in zip (%ld + " ZD " > %ld)\n",
620                (long) dataOffset, (ZD_TYPE) compLen, (long) cdOffset);
621            return false;
622        }
623
624        if (method == kCompressStored &&
625            (off64_t)(dataOffset + uncompLen) > cdOffset)
626        {
627            ALOGE("ERROR: bad uncompressed length in zip (%ld + " ZD " > %ld)\n",
628                (long) dataOffset, (ZD_TYPE) uncompLen, (long) cdOffset);
629            return false;
630        }
631
632        *pOffset = dataOffset;
633    }
634
635    return true;
636}
637
638/*
639 * Copy the entry's filename to the buffer.
640 */
641int ZipFileRO::getEntryFileName(ZipEntryRO entry, char* buffer, int bufLen)
642    const
643{
644    int ent = entryToIndex(entry);
645    if (ent < 0)
646        return -1;
647
648    int nameLen = mHashTable[ent].nameLen;
649    if (bufLen < nameLen+1)
650        return nameLen+1;
651
652    memcpy(buffer, mHashTable[ent].name, nameLen);
653    buffer[nameLen] = '\0';
654    return 0;
655}
656
657/*
658 * Create a new FileMap object that spans the data in "entry".
659 */
660FileMap* ZipFileRO::createEntryFileMap(ZipEntryRO entry) const
661{
662    /*
663     * TODO: the efficient way to do this is to modify FileMap to allow
664     * sub-regions of a file to be mapped.  A reference-counting scheme
665     * can manage the base memory mapping.  For now, we just create a brand
666     * new mapping off of the Zip archive file descriptor.
667     */
668
669    FileMap* newMap;
670    size_t compLen;
671    off64_t offset;
672
673    if (!getEntryInfo(entry, NULL, NULL, &compLen, &offset, NULL, NULL))
674        return NULL;
675
676    newMap = new FileMap();
677    if (!newMap->create(mFileName, mFd, offset, compLen, true)) {
678        newMap->release();
679        return NULL;
680    }
681
682    return newMap;
683}
684
685/*
686 * Uncompress an entry, in its entirety, into the provided output buffer.
687 *
688 * This doesn't verify the data's CRC, which might be useful for
689 * uncompressed data.  The caller should be able to manage it.
690 */
691bool ZipFileRO::uncompressEntry(ZipEntryRO entry, void* buffer) const
692{
693    const size_t kSequentialMin = 32768;
694    bool result = false;
695    int ent = entryToIndex(entry);
696    if (ent < 0)
697        return -1;
698
699    int method;
700    size_t uncompLen, compLen;
701    off64_t offset;
702    const unsigned char* ptr;
703
704    getEntryInfo(entry, &method, &uncompLen, &compLen, &offset, NULL, NULL);
705
706    FileMap* file = createEntryFileMap(entry);
707    if (file == NULL) {
708        goto bail;
709    }
710
711    ptr = (const unsigned char*) file->getDataPtr();
712
713    /*
714     * Experiment with madvise hint.  When we want to uncompress a file,
715     * we pull some stuff out of the central dir entry and then hit a
716     * bunch of compressed or uncompressed data sequentially.  The CDE
717     * visit will cause a limited amount of read-ahead because it's at
718     * the end of the file.  We could end up doing lots of extra disk
719     * access if the file we're prying open is small.  Bottom line is we
720     * probably don't want to turn MADV_SEQUENTIAL on and leave it on.
721     *
722     * So, if the compressed size of the file is above a certain minimum
723     * size, temporarily boost the read-ahead in the hope that the extra
724     * pair of system calls are negated by a reduction in page faults.
725     */
726    if (compLen > kSequentialMin)
727        file->advise(FileMap::SEQUENTIAL);
728
729    if (method == kCompressStored) {
730        memcpy(buffer, ptr, uncompLen);
731    } else {
732        if (!inflateBuffer(buffer, ptr, uncompLen, compLen))
733            goto unmap;
734    }
735
736    if (compLen > kSequentialMin)
737        file->advise(FileMap::NORMAL);
738
739    result = true;
740
741unmap:
742    file->release();
743bail:
744    return result;
745}
746
747/*
748 * Uncompress an entry, in its entirety, to an open file descriptor.
749 *
750 * This doesn't verify the data's CRC, but probably should.
751 */
752bool ZipFileRO::uncompressEntry(ZipEntryRO entry, int fd) const
753{
754    bool result = false;
755    int ent = entryToIndex(entry);
756    if (ent < 0)
757        return -1;
758
759    int method;
760    size_t uncompLen, compLen;
761    off64_t offset;
762    const unsigned char* ptr;
763
764    getEntryInfo(entry, &method, &uncompLen, &compLen, &offset, NULL, NULL);
765
766    FileMap* file = createEntryFileMap(entry);
767    if (file == NULL) {
768        goto bail;
769    }
770
771    ptr = (const unsigned char*) file->getDataPtr();
772
773    if (method == kCompressStored) {
774        ssize_t actual = TEMP_FAILURE_RETRY(write(fd, ptr, uncompLen));
775        if (actual < 0) {
776            ALOGE("Write failed: %s\n", strerror(errno));
777            goto unmap;
778        } else if ((size_t) actual != uncompLen) {
779            ALOGE("Partial write during uncompress (" ZD " of " ZD ")\n",
780                (ZD_TYPE) actual, (ZD_TYPE) uncompLen);
781            goto unmap;
782        } else {
783            ALOGI("+++ successful write\n");
784        }
785    } else {
786        if (!inflateBuffer(fd, ptr, uncompLen, compLen))
787            goto unmap;
788    }
789
790    result = true;
791
792unmap:
793    file->release();
794bail:
795    return result;
796}
797
798/*
799 * Uncompress "deflate" data from one buffer to another.
800 */
801/*static*/ bool ZipFileRO::inflateBuffer(void* outBuf, const void* inBuf,
802    size_t uncompLen, size_t compLen)
803{
804    bool result = false;
805    z_stream zstream;
806    int zerr;
807
808    /*
809     * Initialize the zlib stream struct.
810     */
811    memset(&zstream, 0, sizeof(zstream));
812    zstream.zalloc = Z_NULL;
813    zstream.zfree = Z_NULL;
814    zstream.opaque = Z_NULL;
815    zstream.next_in = (Bytef*)inBuf;
816    zstream.avail_in = compLen;
817    zstream.next_out = (Bytef*) outBuf;
818    zstream.avail_out = uncompLen;
819    zstream.data_type = Z_UNKNOWN;
820
821    /*
822     * Use the undocumented "negative window bits" feature to tell zlib
823     * that there's no zlib header waiting for it.
824     */
825    zerr = inflateInit2(&zstream, -MAX_WBITS);
826    if (zerr != Z_OK) {
827        if (zerr == Z_VERSION_ERROR) {
828            ALOGE("Installed zlib is not compatible with linked version (%s)\n",
829                ZLIB_VERSION);
830        } else {
831            ALOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr);
832        }
833        goto bail;
834    }
835
836    /*
837     * Expand data.
838     */
839    zerr = inflate(&zstream, Z_FINISH);
840    if (zerr != Z_STREAM_END) {
841        ALOGW("Zip inflate failed, zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n",
842            zerr, zstream.next_in, zstream.avail_in,
843            zstream.next_out, zstream.avail_out);
844        goto z_bail;
845    }
846
847    /* paranoia */
848    if (zstream.total_out != uncompLen) {
849        ALOGW("Size mismatch on inflated file (%ld vs " ZD ")\n",
850            zstream.total_out, (ZD_TYPE) uncompLen);
851        goto z_bail;
852    }
853
854    result = true;
855
856z_bail:
857    inflateEnd(&zstream);        /* free up any allocated structures */
858
859bail:
860    return result;
861}
862
863/*
864 * Uncompress "deflate" data from one buffer to an open file descriptor.
865 */
866/*static*/ bool ZipFileRO::inflateBuffer(int fd, const void* inBuf,
867    size_t uncompLen, size_t compLen)
868{
869    bool result = false;
870    const size_t kWriteBufSize = 32768;
871    unsigned char writeBuf[kWriteBufSize];
872    z_stream zstream;
873    int zerr;
874
875    /*
876     * Initialize the zlib stream struct.
877     */
878    memset(&zstream, 0, sizeof(zstream));
879    zstream.zalloc = Z_NULL;
880    zstream.zfree = Z_NULL;
881    zstream.opaque = Z_NULL;
882    zstream.next_in = (Bytef*)inBuf;
883    zstream.avail_in = compLen;
884    zstream.next_out = (Bytef*) writeBuf;
885    zstream.avail_out = sizeof(writeBuf);
886    zstream.data_type = Z_UNKNOWN;
887
888    /*
889     * Use the undocumented "negative window bits" feature to tell zlib
890     * that there's no zlib header waiting for it.
891     */
892    zerr = inflateInit2(&zstream, -MAX_WBITS);
893    if (zerr != Z_OK) {
894        if (zerr == Z_VERSION_ERROR) {
895            ALOGE("Installed zlib is not compatible with linked version (%s)\n",
896                ZLIB_VERSION);
897        } else {
898            ALOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr);
899        }
900        goto bail;
901    }
902
903    /*
904     * Loop while we have more to do.
905     */
906    do {
907        /*
908         * Expand data.
909         */
910        zerr = inflate(&zstream, Z_NO_FLUSH);
911        if (zerr != Z_OK && zerr != Z_STREAM_END) {
912            ALOGW("zlib inflate: zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n",
913                zerr, zstream.next_in, zstream.avail_in,
914                zstream.next_out, zstream.avail_out);
915            goto z_bail;
916        }
917
918        /* write when we're full or when we're done */
919        if (zstream.avail_out == 0 ||
920            (zerr == Z_STREAM_END && zstream.avail_out != sizeof(writeBuf)))
921        {
922            long writeSize = zstream.next_out - writeBuf;
923            int cc = TEMP_FAILURE_RETRY(write(fd, writeBuf, writeSize));
924            if (cc < 0) {
925                ALOGW("write failed in inflate: %s", strerror(errno));
926                goto z_bail;
927            } else if (cc != (int) writeSize) {
928                ALOGW("write failed in inflate (%d vs %ld)", cc, writeSize);
929                goto z_bail;
930            }
931
932            zstream.next_out = writeBuf;
933            zstream.avail_out = sizeof(writeBuf);
934        }
935    } while (zerr == Z_OK);
936
937    assert(zerr == Z_STREAM_END);       /* other errors should've been caught */
938
939    /* paranoia */
940    if (zstream.total_out != uncompLen) {
941        ALOGW("Size mismatch on inflated file (%ld vs " ZD ")\n",
942            zstream.total_out, (ZD_TYPE) uncompLen);
943        goto z_bail;
944    }
945
946    result = true;
947
948z_bail:
949    inflateEnd(&zstream);        /* free up any allocated structures */
950
951bail:
952    return result;
953}
954