Zip.c revision 596271fa71d79e3eec03c7cf6ac76cb026dd8578
1/*
2 * Copyright 2006 The Android Open Source Project
3 *
4 * Simple Zip file support.
5 */
6#include "safe_iop.h"
7#include "zlib.h"
8
9#include <errno.h>
10#include <fcntl.h>
11#include <limits.h>
12#include <stdint.h>     // for uintptr_t
13#include <stdlib.h>
14#include <sys/stat.h>   // for S_ISLNK()
15#include <unistd.h>
16
17#define LOG_TAG "minzip"
18#include "Zip.h"
19#include "Bits.h"
20#include "Log.h"
21#include "DirUtil.h"
22
23#undef NDEBUG   // do this after including Log.h
24#include <assert.h>
25
26#define SORT_ENTRIES 1
27
28/*
29 * Offset and length constants (java.util.zip naming convention).
30 */
31enum {
32    CENSIG = 0x02014b50,      // PK12
33    CENHDR = 46,
34
35    CENVEM =  4,
36    CENVER =  6,
37    CENFLG =  8,
38    CENHOW = 10,
39    CENTIM = 12,
40    CENCRC = 16,
41    CENSIZ = 20,
42    CENLEN = 24,
43    CENNAM = 28,
44    CENEXT = 30,
45    CENCOM = 32,
46    CENDSK = 34,
47    CENATT = 36,
48    CENATX = 38,
49    CENOFF = 42,
50
51    ENDSIG = 0x06054b50,     // PK56
52    ENDHDR = 22,
53
54    ENDSUB =  8,
55    ENDTOT = 10,
56    ENDSIZ = 12,
57    ENDOFF = 16,
58    ENDCOM = 20,
59
60    EXTSIG = 0x08074b50,     // PK78
61    EXTHDR = 16,
62
63    EXTCRC =  4,
64    EXTSIZ =  8,
65    EXTLEN = 12,
66
67    LOCSIG = 0x04034b50,      // PK34
68    LOCHDR = 30,
69
70    LOCVER =  4,
71    LOCFLG =  6,
72    LOCHOW =  8,
73    LOCTIM = 10,
74    LOCCRC = 14,
75    LOCSIZ = 18,
76    LOCLEN = 22,
77    LOCNAM = 26,
78    LOCEXT = 28,
79
80    STORED = 0,
81    DEFLATED = 8,
82
83    CENVEM_UNIX = 3 << 8,   // the high byte of CENVEM
84};
85
86
87/*
88 * For debugging, dump the contents of a ZipEntry.
89 */
90#if 0
91static void dumpEntry(const ZipEntry* pEntry)
92{
93    LOGI(" %p '%.*s'\n", pEntry->fileName,pEntry->fileNameLen,pEntry->fileName);
94    LOGI("   off=%ld comp=%ld uncomp=%ld how=%d\n", pEntry->offset,
95        pEntry->compLen, pEntry->uncompLen, pEntry->compression);
96}
97#endif
98
99/*
100 * (This is a mzHashTableLookup callback.)
101 *
102 * Compare two ZipEntry structs, by name.
103 */
104static int hashcmpZipEntry(const void* ventry1, const void* ventry2)
105{
106    const ZipEntry* entry1 = (const ZipEntry*) ventry1;
107    const ZipEntry* entry2 = (const ZipEntry*) ventry2;
108
109    if (entry1->fileNameLen != entry2->fileNameLen)
110        return entry1->fileNameLen - entry2->fileNameLen;
111    return memcmp(entry1->fileName, entry2->fileName, entry1->fileNameLen);
112}
113
114/*
115 * (This is a mzHashTableLookup callback.)
116 *
117 * find a ZipEntry struct by name.
118 */
119static int hashcmpZipName(const void* ventry, const void* vname)
120{
121    const ZipEntry* entry = (const ZipEntry*) ventry;
122    const char* name = (const char*) vname;
123    unsigned int nameLen = strlen(name);
124
125    if (entry->fileNameLen != nameLen)
126        return entry->fileNameLen - nameLen;
127    return memcmp(entry->fileName, name, nameLen);
128}
129
130/*
131 * Compute the hash code for a ZipEntry filename.
132 *
133 * Not expected to be compatible with any other hash function, so we init
134 * to 2 to ensure it doesn't happen to match.
135 */
136static unsigned int computeHash(const char* name, int nameLen)
137{
138    unsigned int hash = 2;
139
140    while (nameLen--)
141        hash = hash * 31 + *name++;
142
143    return hash;
144}
145
146static void addEntryToHashTable(HashTable* pHash, ZipEntry* pEntry)
147{
148    unsigned int itemHash = computeHash(pEntry->fileName, pEntry->fileNameLen);
149    const ZipEntry* found;
150
151    found = (const ZipEntry*)mzHashTableLookup(pHash,
152                itemHash, pEntry, hashcmpZipEntry, true);
153    if (found != pEntry) {
154        LOGW("WARNING: duplicate entry '%.*s' in Zip\n",
155            found->fileNameLen, found->fileName);
156        /* keep going */
157    }
158}
159
160static int validFilename(const char *fileName, unsigned int fileNameLen)
161{
162    // Forbid super long filenames.
163    if (fileNameLen >= PATH_MAX) {
164        LOGW("Filename too long (%d chatacters)\n", fileNameLen);
165        return 0;
166    }
167
168    // Require all characters to be printable ASCII (no NUL, no UTF-8, etc).
169    unsigned int i;
170    for (i = 0; i < fileNameLen; ++i) {
171        if (fileName[i] < 32 || fileName[i] >= 127) {
172            LOGW("Filename contains invalid character '\%03o'\n", fileName[i]);
173            return 0;
174        }
175    }
176
177    return 1;
178}
179
180/*
181 * Parse the contents of a Zip archive.  After confirming that the file
182 * is in fact a Zip, we scan out the contents of the central directory and
183 * store it in a hash table.
184 *
185 * Returns "true" on success.
186 */
187static bool parseZipArchive(ZipArchive* pArchive, const MemMapping* pMap)
188{
189    bool result = false;
190    const unsigned char* ptr;
191    unsigned int i, numEntries, cdOffset;
192    unsigned int val;
193
194    /*
195     * The first 4 bytes of the file will either be the local header
196     * signature for the first file (LOCSIG) or, if the archive doesn't
197     * have any files in it, the end-of-central-directory signature (ENDSIG).
198     */
199    val = get4LE(pMap->addr);
200    if (val == ENDSIG) {
201        LOGI("Found Zip archive, but it looks empty\n");
202        goto bail;
203    } else if (val != LOCSIG) {
204        LOGV("Not a Zip archive (found 0x%08x)\n", val);
205        goto bail;
206    }
207
208    /*
209     * Find the EOCD.  We'll find it immediately unless they have a file
210     * comment.
211     */
212    ptr = pMap->addr + pMap->length - ENDHDR;
213
214    while (ptr >= (const unsigned char*) pMap->addr) {
215        if (*ptr == (ENDSIG & 0xff) && get4LE(ptr) == ENDSIG)
216            break;
217        ptr--;
218    }
219    if (ptr < (const unsigned char*) pMap->addr) {
220        LOGI("Could not find end-of-central-directory in Zip\n");
221        goto bail;
222    }
223
224    /*
225     * There are two interesting items in the EOCD block: the number of
226     * entries in the file, and the file offset of the start of the
227     * central directory.
228     */
229    numEntries = get2LE(ptr + ENDSUB);
230    cdOffset = get4LE(ptr + ENDOFF);
231
232    LOGVV("numEntries=%d cdOffset=%d\n", numEntries, cdOffset);
233    if (numEntries == 0 || cdOffset >= pMap->length) {
234        LOGW("Invalid entries=%d offset=%d (len=%zd)\n",
235            numEntries, cdOffset, pMap->length);
236        goto bail;
237    }
238
239    /*
240     * Create data structures to hold entries.
241     */
242    pArchive->numEntries = numEntries;
243    pArchive->pEntries = (ZipEntry*) calloc(numEntries, sizeof(ZipEntry));
244    pArchive->pHash = mzHashTableCreate(mzHashSize(numEntries), NULL);
245    if (pArchive->pEntries == NULL || pArchive->pHash == NULL)
246        goto bail;
247
248    ptr = pMap->addr + cdOffset;
249    for (i = 0; i < numEntries; i++) {
250        ZipEntry* pEntry;
251        unsigned int fileNameLen, extraLen, commentLen, localHdrOffset;
252        const unsigned char* localHdr;
253        const char *fileName;
254
255        if (ptr + CENHDR > (const unsigned char*)pMap->addr + pMap->length) {
256            LOGW("Ran off the end (at %d)\n", i);
257            goto bail;
258        }
259        if (get4LE(ptr) != CENSIG) {
260            LOGW("Missed a central dir sig (at %d)\n", i);
261            goto bail;
262        }
263
264        localHdrOffset = get4LE(ptr + CENOFF);
265        fileNameLen = get2LE(ptr + CENNAM);
266        extraLen = get2LE(ptr + CENEXT);
267        commentLen = get2LE(ptr + CENCOM);
268        fileName = (const char*)ptr + CENHDR;
269        if (fileName + fileNameLen > (const char*)pMap->addr + pMap->length) {
270            LOGW("Filename ran off the end (at %d)\n", i);
271            goto bail;
272        }
273        if (!validFilename(fileName, fileNameLen)) {
274            LOGW("Invalid filename (at %d)\n", i);
275            goto bail;
276        }
277
278#if SORT_ENTRIES
279        /* Figure out where this entry should go (binary search).
280         */
281        if (i > 0) {
282            int low, high;
283
284            low = 0;
285            high = i - 1;
286            while (low <= high) {
287                int mid;
288                int diff;
289                int diffLen;
290
291                mid = low + ((high - low) / 2); // avoid overflow
292
293                if (pArchive->pEntries[mid].fileNameLen < fileNameLen) {
294                    diffLen = pArchive->pEntries[mid].fileNameLen;
295                } else {
296                    diffLen = fileNameLen;
297                }
298                diff = strncmp(pArchive->pEntries[mid].fileName, fileName,
299                        diffLen);
300                if (diff == 0) {
301                    diff = pArchive->pEntries[mid].fileNameLen - fileNameLen;
302                }
303                if (diff < 0) {
304                    low = mid + 1;
305                } else if (diff > 0) {
306                    high = mid - 1;
307                } else {
308                    high = mid;
309                    break;
310                }
311            }
312
313            unsigned int target = high + 1;
314            assert(target <= i);
315            if (target != i) {
316                /* It belongs somewhere other than at the end of
317                 * the list.  Make some room at [target].
318                 */
319                memmove(pArchive->pEntries + target + 1,
320                        pArchive->pEntries + target,
321                        (i - target) * sizeof(ZipEntry));
322            }
323            pEntry = &pArchive->pEntries[target];
324        } else {
325            pEntry = &pArchive->pEntries[0];
326        }
327#else
328        pEntry = &pArchive->pEntries[i];
329#endif
330
331        //LOGI("%d: localHdr=%d fnl=%d el=%d cl=%d\n",
332        //    i, localHdrOffset, fileNameLen, extraLen, commentLen);
333
334        pEntry->fileNameLen = fileNameLen;
335        pEntry->fileName = fileName;
336
337        pEntry->compLen = get4LE(ptr + CENSIZ);
338        pEntry->uncompLen = get4LE(ptr + CENLEN);
339        pEntry->compression = get2LE(ptr + CENHOW);
340        pEntry->modTime = get4LE(ptr + CENTIM);
341        pEntry->crc32 = get4LE(ptr + CENCRC);
342
343        /* These two are necessary for finding the mode of the file.
344         */
345        pEntry->versionMadeBy = get2LE(ptr + CENVEM);
346        if ((pEntry->versionMadeBy & 0xff00) != 0 &&
347                (pEntry->versionMadeBy & 0xff00) != CENVEM_UNIX)
348        {
349            LOGW("Incompatible \"version made by\": 0x%02x (at %d)\n",
350                    pEntry->versionMadeBy >> 8, i);
351            goto bail;
352        }
353        pEntry->externalFileAttributes = get4LE(ptr + CENATX);
354
355        // Perform pMap->addr + localHdrOffset, ensuring that it won't
356        // overflow. This is needed because localHdrOffset is untrusted.
357        if (!safe_add((uintptr_t *)&localHdr, (uintptr_t)pMap->addr,
358            (uintptr_t)localHdrOffset)) {
359            LOGW("Integer overflow adding in parseZipArchive\n");
360            goto bail;
361        }
362        if ((uintptr_t)localHdr + LOCHDR >
363            (uintptr_t)pMap->addr + pMap->length) {
364            LOGW("Bad offset to local header: %d (at %d)\n", localHdrOffset, i);
365            goto bail;
366        }
367        if (get4LE(localHdr) != LOCSIG) {
368            LOGW("Missed a local header sig (at %d)\n", i);
369            goto bail;
370        }
371        pEntry->offset = localHdrOffset + LOCHDR
372            + get2LE(localHdr + LOCNAM) + get2LE(localHdr + LOCEXT);
373        if (!safe_add(NULL, pEntry->offset, pEntry->compLen)) {
374            LOGW("Integer overflow adding in parseZipArchive\n");
375            goto bail;
376        }
377        if ((size_t)pEntry->offset + pEntry->compLen > pMap->length) {
378            LOGW("Data ran off the end (at %d)\n", i);
379            goto bail;
380        }
381
382#if !SORT_ENTRIES
383        /* Add to hash table; no need to lock here.
384         * Can't do this now if we're sorting, because entries
385         * will move around.
386         */
387        addEntryToHashTable(pArchive->pHash, pEntry);
388#endif
389
390        //dumpEntry(pEntry);
391        ptr += CENHDR + fileNameLen + extraLen + commentLen;
392    }
393
394#if SORT_ENTRIES
395    /* If we're sorting, we have to wait until all entries
396     * are in their final places, otherwise the pointers will
397     * probably point to the wrong things.
398     */
399    for (i = 0; i < numEntries; i++) {
400        /* Add to hash table; no need to lock here.
401         */
402        addEntryToHashTable(pArchive->pHash, &pArchive->pEntries[i]);
403    }
404#endif
405
406    result = true;
407
408bail:
409    if (!result) {
410        mzHashTableFree(pArchive->pHash);
411        pArchive->pHash = NULL;
412    }
413    return result;
414}
415
416/*
417 * Open a Zip archive and scan out the contents.
418 *
419 * The easiest way to do this is to mmap() the whole thing and do the
420 * traditional backward scan for central directory.  Since the EOCD is
421 * a relatively small bit at the end, we should end up only touching a
422 * small set of pages.
423 *
424 * This will be called on non-Zip files, especially during startup, so
425 * we don't want to be too noisy about failures.  (Do we want a "quiet"
426 * flag?)
427 *
428 * On success, we fill out the contents of "pArchive".
429 */
430int mzOpenZipArchive(const char* fileName, ZipArchive* pArchive)
431{
432    MemMapping map;
433    int err;
434
435    LOGV("Opening archive '%s' %p\n", fileName, pArchive);
436
437    map.addr = NULL;
438    memset(pArchive, 0, sizeof(*pArchive));
439
440    pArchive->fd = open(fileName, O_RDONLY, 0);
441    if (pArchive->fd < 0) {
442        err = errno ? errno : -1;
443        LOGV("Unable to open '%s': %s\n", fileName, strerror(err));
444        goto bail;
445    }
446
447    if (sysMapFileInShmem(pArchive->fd, &map) != 0) {
448        err = -1;
449        LOGW("Map of '%s' failed\n", fileName);
450        goto bail;
451    }
452
453    if (map.length < ENDHDR) {
454        err = -1;
455        LOGV("File '%s' too small to be zip (%zd)\n", fileName, map.length);
456        goto bail;
457    }
458
459    if (!parseZipArchive(pArchive, &map)) {
460        err = -1;
461        LOGV("Parsing '%s' failed\n", fileName);
462        goto bail;
463    }
464
465    err = 0;
466    sysCopyMap(&pArchive->map, &map);
467    map.addr = NULL;
468
469bail:
470    if (err != 0)
471        mzCloseZipArchive(pArchive);
472    if (map.addr != NULL)
473        sysReleaseShmem(&map);
474    return err;
475}
476
477/*
478 * Close a ZipArchive, closing the file and freeing the contents.
479 *
480 * NOTE: the ZipArchive may not have been fully created.
481 */
482void mzCloseZipArchive(ZipArchive* pArchive)
483{
484    LOGV("Closing archive %p\n", pArchive);
485
486    if (pArchive->fd >= 0)
487        close(pArchive->fd);
488    if (pArchive->map.addr != NULL)
489        sysReleaseShmem(&pArchive->map);
490
491    free(pArchive->pEntries);
492
493    mzHashTableFree(pArchive->pHash);
494
495    pArchive->fd = -1;
496    pArchive->pHash = NULL;
497    pArchive->pEntries = NULL;
498}
499
500/*
501 * Find a matching entry.
502 *
503 * Returns NULL if no matching entry found.
504 */
505const ZipEntry* mzFindZipEntry(const ZipArchive* pArchive,
506        const char* entryName)
507{
508    unsigned int itemHash = computeHash(entryName, strlen(entryName));
509
510    return (const ZipEntry*)mzHashTableLookup(pArchive->pHash,
511                itemHash, (char*) entryName, hashcmpZipName, false);
512}
513
514/*
515 * Return true if the entry is a symbolic link.
516 */
517bool mzIsZipEntrySymlink(const ZipEntry* pEntry)
518{
519    if ((pEntry->versionMadeBy & 0xff00) == CENVEM_UNIX) {
520        return S_ISLNK(pEntry->externalFileAttributes >> 16);
521    }
522    return false;
523}
524
525/* Call processFunction on the uncompressed data of a STORED entry.
526 */
527static bool processStoredEntry(const ZipArchive *pArchive,
528    const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction,
529    void *cookie)
530{
531    size_t bytesLeft = pEntry->compLen;
532    while (bytesLeft > 0) {
533        unsigned char buf[32 * 1024];
534        ssize_t n;
535        size_t count;
536        bool ret;
537
538        count = bytesLeft;
539        if (count > sizeof(buf)) {
540            count = sizeof(buf);
541        }
542        n = read(pArchive->fd, buf, count);
543        if (n < 0 || (size_t)n != count) {
544            LOGE("Can't read %zu bytes from zip file: %ld\n", count, n);
545            return false;
546        }
547        ret = processFunction(buf, n, cookie);
548        if (!ret) {
549            return false;
550        }
551        bytesLeft -= count;
552    }
553    return true;
554}
555
556static bool processDeflatedEntry(const ZipArchive *pArchive,
557    const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction,
558    void *cookie)
559{
560    long result = -1;
561    unsigned char readBuf[32 * 1024];
562    unsigned char procBuf[32 * 1024];
563    z_stream zstream;
564    int zerr;
565    long compRemaining;
566
567    compRemaining = pEntry->compLen;
568
569    /*
570     * Initialize the zlib stream.
571     */
572    memset(&zstream, 0, sizeof(zstream));
573    zstream.zalloc = Z_NULL;
574    zstream.zfree = Z_NULL;
575    zstream.opaque = Z_NULL;
576    zstream.next_in = NULL;
577    zstream.avail_in = 0;
578    zstream.next_out = (Bytef*) procBuf;
579    zstream.avail_out = sizeof(procBuf);
580    zstream.data_type = Z_UNKNOWN;
581
582    /*
583     * Use the undocumented "negative window bits" feature to tell zlib
584     * that there's no zlib header waiting for it.
585     */
586    zerr = inflateInit2(&zstream, -MAX_WBITS);
587    if (zerr != Z_OK) {
588        if (zerr == Z_VERSION_ERROR) {
589            LOGE("Installed zlib is not compatible with linked version (%s)\n",
590                ZLIB_VERSION);
591        } else {
592            LOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr);
593        }
594        goto bail;
595    }
596
597    /*
598     * Loop while we have data.
599     */
600    do {
601        /* read as much as we can */
602        if (zstream.avail_in == 0) {
603            long getSize = (compRemaining > (long)sizeof(readBuf)) ?
604                        (long)sizeof(readBuf) : compRemaining;
605            LOGVV("+++ reading %ld bytes (%ld left)\n",
606                getSize, compRemaining);
607
608            int cc = read(pArchive->fd, readBuf, getSize);
609            if (cc != (int) getSize) {
610                LOGW("inflate read failed (%d vs %ld)\n", cc, getSize);
611                goto z_bail;
612            }
613
614            compRemaining -= getSize;
615
616            zstream.next_in = readBuf;
617            zstream.avail_in = getSize;
618        }
619
620        /* uncompress the data */
621        zerr = inflate(&zstream, Z_NO_FLUSH);
622        if (zerr != Z_OK && zerr != Z_STREAM_END) {
623            LOGD("zlib inflate call failed (zerr=%d)\n", zerr);
624            goto z_bail;
625        }
626
627        /* write when we're full or when we're done */
628        if (zstream.avail_out == 0 ||
629            (zerr == Z_STREAM_END && zstream.avail_out != sizeof(procBuf)))
630        {
631            long procSize = zstream.next_out - procBuf;
632            LOGVV("+++ processing %d bytes\n", (int) procSize);
633            bool ret = processFunction(procBuf, procSize, cookie);
634            if (!ret) {
635                LOGW("Process function elected to fail (in inflate)\n");
636                goto z_bail;
637            }
638
639            zstream.next_out = procBuf;
640            zstream.avail_out = sizeof(procBuf);
641        }
642    } while (zerr == Z_OK);
643
644    assert(zerr == Z_STREAM_END);       /* other errors should've been caught */
645
646    // success!
647    result = zstream.total_out;
648
649z_bail:
650    inflateEnd(&zstream);        /* free up any allocated structures */
651
652bail:
653    if (result != pEntry->uncompLen) {
654        if (result != -1)        // error already shown?
655            LOGW("Size mismatch on inflated file (%ld vs %ld)\n",
656                result, pEntry->uncompLen);
657        return false;
658    }
659    return true;
660}
661
662/*
663 * Stream the uncompressed data through the supplied function,
664 * passing cookie to it each time it gets called.  processFunction
665 * may be called more than once.
666 *
667 * If processFunction returns false, the operation is abandoned and
668 * mzProcessZipEntryContents() immediately returns false.
669 *
670 * This is useful for calculating the hash of an entry's uncompressed contents.
671 */
672bool mzProcessZipEntryContents(const ZipArchive *pArchive,
673    const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction,
674    void *cookie)
675{
676    bool ret = false;
677    off_t oldOff;
678
679    /* save current offset */
680    oldOff = lseek(pArchive->fd, 0, SEEK_CUR);
681
682    /* Seek to the beginning of the entry's compressed data. */
683    lseek(pArchive->fd, pEntry->offset, SEEK_SET);
684
685    switch (pEntry->compression) {
686    case STORED:
687        ret = processStoredEntry(pArchive, pEntry, processFunction, cookie);
688        break;
689    case DEFLATED:
690        ret = processDeflatedEntry(pArchive, pEntry, processFunction, cookie);
691        break;
692    default:
693        LOGE("Unsupported compression type %d for entry '%s'\n",
694                pEntry->compression, pEntry->fileName);
695        break;
696    }
697
698    /* restore file offset */
699    lseek(pArchive->fd, oldOff, SEEK_SET);
700    return ret;
701}
702
703static bool crcProcessFunction(const unsigned char *data, int dataLen,
704        void *crc)
705{
706    *(unsigned long *)crc = crc32(*(unsigned long *)crc, data, dataLen);
707    return true;
708}
709
710/*
711 * Check the CRC on this entry; return true if it is correct.
712 * May do other internal checks as well.
713 */
714bool mzIsZipEntryIntact(const ZipArchive *pArchive, const ZipEntry *pEntry)
715{
716    unsigned long crc;
717    bool ret;
718
719    crc = crc32(0L, Z_NULL, 0);
720    ret = mzProcessZipEntryContents(pArchive, pEntry, crcProcessFunction,
721            (void *)&crc);
722    if (!ret) {
723        LOGE("Can't calculate CRC for entry\n");
724        return false;
725    }
726    if (crc != (unsigned long)pEntry->crc32) {
727        LOGW("CRC for entry %.*s (0x%08lx) != expected (0x%08lx)\n",
728                pEntry->fileNameLen, pEntry->fileName, crc, pEntry->crc32);
729        return false;
730    }
731    return true;
732}
733
734typedef struct {
735    char *buf;
736    int bufLen;
737} CopyProcessArgs;
738
739static bool copyProcessFunction(const unsigned char *data, int dataLen,
740        void *cookie)
741{
742    CopyProcessArgs *args = (CopyProcessArgs *)cookie;
743    if (dataLen <= args->bufLen) {
744        memcpy(args->buf, data, dataLen);
745        args->buf += dataLen;
746        args->bufLen -= dataLen;
747        return true;
748    }
749    return false;
750}
751
752/*
753 * Read an entry into a buffer allocated by the caller.
754 */
755bool mzReadZipEntry(const ZipArchive* pArchive, const ZipEntry* pEntry,
756        char *buf, int bufLen)
757{
758    CopyProcessArgs args;
759    bool ret;
760
761    args.buf = buf;
762    args.bufLen = bufLen;
763    ret = mzProcessZipEntryContents(pArchive, pEntry, copyProcessFunction,
764            (void *)&args);
765    if (!ret) {
766        LOGE("Can't extract entry to buffer.\n");
767        return false;
768    }
769    return true;
770}
771
772static bool writeProcessFunction(const unsigned char *data, int dataLen,
773        void *fd)
774{
775    int zeroWrites = 0;
776    ssize_t soFar = 0;
777    do {
778        ssize_t n = write((int)fd, data+soFar, dataLen-soFar);
779        if (n < 0) {
780            LOGE("Error writing %ld bytes from zip file: %s\n",
781                 dataLen-soFar, strerror(errno));
782            return false;
783        } else if (n > 0) {
784            soFar += n;
785            if (soFar == dataLen) return true;
786            if (soFar > dataLen) {
787                LOGE("write overrun?  (%ld bytes instead of %d)\n",
788                     soFar, dataLen);
789                return false;
790            }
791            zeroWrites = 0;
792        } else {
793            ++zeroWrites;
794        }
795    } while (zeroWrites < 5);
796    LOGE("too many consecutive zero-length writes\n");
797    return false;
798}
799
800/*
801 * Uncompress "pEntry" in "pArchive" to "fd" at the current offset.
802 */
803bool mzExtractZipEntryToFile(const ZipArchive *pArchive,
804    const ZipEntry *pEntry, int fd)
805{
806    bool ret = mzProcessZipEntryContents(pArchive, pEntry, writeProcessFunction,
807            (void *)fd);
808    if (!ret) {
809        LOGE("Can't extract entry to file.\n");
810        return false;
811    }
812    return true;
813}
814
815/* Helper state to make path translation easier and less malloc-happy.
816 */
817typedef struct {
818    const char *targetDir;
819    const char *zipDir;
820    char *buf;
821    int targetDirLen;
822    int zipDirLen;
823    int bufLen;
824} MzPathHelper;
825
826/* Given the values of targetDir and zipDir in the helper,
827 * return the target filename of the provided entry.
828 * The helper must be initialized first.
829 */
830static const char *targetEntryPath(MzPathHelper *helper, ZipEntry *pEntry)
831{
832    int needLen;
833    bool firstTime = (helper->buf == NULL);
834
835    /* target file <-- targetDir + / + entry[zipDirLen:]
836     */
837    needLen = helper->targetDirLen + 1 +
838            pEntry->fileNameLen - helper->zipDirLen + 1;
839    if (needLen > helper->bufLen) {
840        char *newBuf;
841
842        needLen *= 2;
843        newBuf = (char *)realloc(helper->buf, needLen);
844        if (newBuf == NULL) {
845            return NULL;
846        }
847        helper->buf = newBuf;
848        helper->bufLen = needLen;
849    }
850
851    /* Every path will start with the target path and a slash.
852     */
853    if (firstTime) {
854        char *p = helper->buf;
855        memcpy(p, helper->targetDir, helper->targetDirLen);
856        p += helper->targetDirLen;
857        if (p == helper->buf || p[-1] != '/') {
858            helper->targetDirLen += 1;
859            *p++ = '/';
860        }
861    }
862
863    /* Replace the custom part of the path with the appropriate
864     * part of the entry's path.
865     */
866    char *epath = helper->buf + helper->targetDirLen;
867    memcpy(epath, pEntry->fileName + helper->zipDirLen,
868            pEntry->fileNameLen - helper->zipDirLen);
869    epath += pEntry->fileNameLen - helper->zipDirLen;
870    *epath = '\0';
871
872    return helper->buf;
873}
874
875/*
876 * Inflate all entries under zipDir to the directory specified by
877 * targetDir, which must exist and be a writable directory.
878 *
879 * The immediate children of zipDir will become the immediate
880 * children of targetDir; e.g., if the archive contains the entries
881 *
882 *     a/b/c/one
883 *     a/b/c/two
884 *     a/b/c/d/three
885 *
886 * and mzExtractRecursive(a, "a/b/c", "/tmp") is called, the resulting
887 * files will be
888 *
889 *     /tmp/one
890 *     /tmp/two
891 *     /tmp/d/three
892 *
893 * Returns true on success, false on failure.
894 */
895bool mzExtractRecursive(const ZipArchive *pArchive,
896                        const char *zipDir, const char *targetDir,
897                        int flags, const struct utimbuf *timestamp,
898                        void (*callback)(const char *fn, void *), void *cookie)
899{
900    if (zipDir[0] == '/') {
901        LOGE("mzExtractRecursive(): zipDir must be a relative path.\n");
902        return false;
903    }
904    if (targetDir[0] != '/') {
905        LOGE("mzExtractRecursive(): targetDir must be an absolute path.\n");
906        return false;
907    }
908
909    unsigned int zipDirLen;
910    char *zpath;
911
912    zipDirLen = strlen(zipDir);
913    zpath = (char *)malloc(zipDirLen + 2);
914    if (zpath == NULL) {
915        LOGE("Can't allocate %d bytes for zip path\n", zipDirLen + 2);
916        return false;
917    }
918    /* If zipDir is empty, we'll extract the entire zip file.
919     * Otherwise, canonicalize the path.
920     */
921    if (zipDirLen > 0) {
922        /* Make sure there's (hopefully, exactly one) slash at the
923         * end of the path.  This way we don't need to worry about
924         * accidentally extracting "one/twothree" when a path like
925         * "one/two" is specified.
926         */
927        memcpy(zpath, zipDir, zipDirLen);
928        if (zpath[zipDirLen-1] != '/') {
929            zpath[zipDirLen++] = '/';
930        }
931    }
932    zpath[zipDirLen] = '\0';
933
934    /* Set up the helper structure that we'll use to assemble paths.
935     */
936    MzPathHelper helper;
937    helper.targetDir = targetDir;
938    helper.targetDirLen = strlen(helper.targetDir);
939    helper.zipDir = zpath;
940    helper.zipDirLen = strlen(helper.zipDir);
941    helper.buf = NULL;
942    helper.bufLen = 0;
943
944    /* Walk through the entries and extract anything whose path begins
945     * with zpath.
946//TODO: since the entries are sorted, binary search for the first match
947//      and stop after the first non-match.
948     */
949    unsigned int i;
950    bool seenMatch = false;
951    int ok = true;
952    for (i = 0; i < pArchive->numEntries; i++) {
953        ZipEntry *pEntry = pArchive->pEntries + i;
954        if (pEntry->fileNameLen < zipDirLen) {
955//TODO: look out for a single empty directory entry that matches zpath, but
956//      missing the trailing slash.  Most zip files seem to include
957//      the trailing slash, but I think it's legal to leave it off.
958//      e.g., zpath "a/b/", entry "a/b", with no children of the entry.
959            /* No chance of matching.
960             */
961#if SORT_ENTRIES
962            if (seenMatch) {
963                /* Since the entries are sorted, we can give up
964                 * on the first mismatch after the first match.
965                 */
966                break;
967            }
968#endif
969            continue;
970        }
971        /* If zpath is empty, this strncmp() will match everything,
972         * which is what we want.
973         */
974        if (strncmp(pEntry->fileName, zpath, zipDirLen) != 0) {
975#if SORT_ENTRIES
976            if (seenMatch) {
977                /* Since the entries are sorted, we can give up
978                 * on the first mismatch after the first match.
979                 */
980                break;
981            }
982#endif
983            continue;
984        }
985        /* This entry begins with zipDir, so we'll extract it.
986         */
987        seenMatch = true;
988
989        /* Find the target location of the entry.
990         */
991        const char *targetFile = targetEntryPath(&helper, pEntry);
992        if (targetFile == NULL) {
993            LOGE("Can't assemble target path for \"%.*s\"\n",
994                    pEntry->fileNameLen, pEntry->fileName);
995            ok = false;
996            break;
997        }
998
999        /* With DRY_RUN set, invoke the callback but don't do anything else.
1000         */
1001        if (flags & MZ_EXTRACT_DRY_RUN) {
1002            if (callback != NULL) callback(targetFile, cookie);
1003            continue;
1004        }
1005
1006        /* Create the file or directory.
1007         */
1008#define UNZIP_DIRMODE 0755
1009#define UNZIP_FILEMODE 0644
1010        if (pEntry->fileName[pEntry->fileNameLen-1] == '/') {
1011            if (!(flags & MZ_EXTRACT_FILES_ONLY)) {
1012                int ret = dirCreateHierarchy(
1013                        targetFile, UNZIP_DIRMODE, timestamp, false);
1014                if (ret != 0) {
1015                    LOGE("Can't create containing directory for \"%s\": %s\n",
1016                            targetFile, strerror(errno));
1017                    ok = false;
1018                    break;
1019                }
1020                LOGD("Extracted dir \"%s\"\n", targetFile);
1021            }
1022        } else {
1023            /* This is not a directory.  First, make sure that
1024             * the containing directory exists.
1025             */
1026            int ret = dirCreateHierarchy(
1027                    targetFile, UNZIP_DIRMODE, timestamp, true);
1028            if (ret != 0) {
1029                LOGE("Can't create containing directory for \"%s\": %s\n",
1030                        targetFile, strerror(errno));
1031                ok = false;
1032                break;
1033            }
1034
1035            /* With FILES_ONLY set, we need to ignore metadata entirely,
1036             * so treat symlinks as regular files.
1037             */
1038            if (!(flags & MZ_EXTRACT_FILES_ONLY) && mzIsZipEntrySymlink(pEntry)) {
1039                /* The entry is a symbolic link.
1040                 * The relative target of the symlink is in the
1041                 * data section of this entry.
1042                 */
1043                if (pEntry->uncompLen == 0) {
1044                    LOGE("Symlink entry \"%s\" has no target\n",
1045                            targetFile);
1046                    ok = false;
1047                    break;
1048                }
1049                char *linkTarget = malloc(pEntry->uncompLen + 1);
1050                if (linkTarget == NULL) {
1051                    ok = false;
1052                    break;
1053                }
1054                ok = mzReadZipEntry(pArchive, pEntry, linkTarget,
1055                        pEntry->uncompLen);
1056                if (!ok) {
1057                    LOGE("Can't read symlink target for \"%s\"\n",
1058                            targetFile);
1059                    free(linkTarget);
1060                    break;
1061                }
1062                linkTarget[pEntry->uncompLen] = '\0';
1063
1064                /* Make the link.
1065                 */
1066                ret = symlink(linkTarget, targetFile);
1067                if (ret != 0) {
1068                    LOGE("Can't symlink \"%s\" to \"%s\": %s\n",
1069                            targetFile, linkTarget, strerror(errno));
1070                    free(linkTarget);
1071                    ok = false;
1072                    break;
1073                }
1074                LOGD("Extracted symlink \"%s\" -> \"%s\"\n",
1075                        targetFile, linkTarget);
1076                free(linkTarget);
1077            } else {
1078                /* The entry is a regular file.
1079                 * Open the target for writing.
1080                 */
1081                int fd = creat(targetFile, UNZIP_FILEMODE);
1082                if (fd < 0) {
1083                    LOGE("Can't create target file \"%s\": %s\n",
1084                            targetFile, strerror(errno));
1085                    ok = false;
1086                    break;
1087                }
1088
1089                bool ok = mzExtractZipEntryToFile(pArchive, pEntry, fd);
1090                close(fd);
1091                if (!ok) {
1092                    LOGE("Error extracting \"%s\"\n", targetFile);
1093                    ok = false;
1094                    break;
1095                }
1096
1097                if (timestamp != NULL && utime(targetFile, timestamp)) {
1098                    LOGE("Error touching \"%s\"\n", targetFile);
1099                    ok = false;
1100                    break;
1101                }
1102
1103                LOGD("Extracted file \"%s\"\n", targetFile);
1104            }
1105        }
1106
1107        if (callback != NULL) callback(targetFile, cookie);
1108    }
1109
1110    free(helper.buf);
1111    free(zpath);
1112
1113    return ok;
1114}
1115