ZipFile.cpp revision a982dc05d7ca919c07f50e446549ef9dceadf6bd
1/*
2 * Copyright (C) 2006 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//
18// Access to Zip archives.
19//
20
21#define LOG_TAG "zip"
22
23#include <utils/Log.h>
24#include <utils/ZipUtils.h>
25
26#include "ZipFile.h"
27
28#include <zlib.h>
29#define DEF_MEM_LEVEL 8                // normally in zutil.h?
30
31#include <memory.h>
32#include <sys/stat.h>
33#include <errno.h>
34#include <assert.h>
35
36using namespace android;
37
38/*
39 * Some environments require the "b", some choke on it.
40 */
41#define FILE_OPEN_RO        "rb"
42#define FILE_OPEN_RW        "r+b"
43#define FILE_OPEN_RW_CREATE "w+b"
44
45/* should live somewhere else? */
46static status_t errnoToStatus(int err)
47{
48    if (err == ENOENT)
49        return NAME_NOT_FOUND;
50    else if (err == EACCES)
51        return PERMISSION_DENIED;
52    else
53        return UNKNOWN_ERROR;
54}
55
56/*
57 * Open a file and parse its guts.
58 */
59status_t ZipFile::open(const char* zipFileName, int flags)
60{
61    bool newArchive = false;
62
63    assert(mZipFp == NULL);     // no reopen
64
65    if ((flags & kOpenTruncate))
66        flags |= kOpenCreate;           // trunc implies create
67
68    if ((flags & kOpenReadOnly) && (flags & kOpenReadWrite))
69        return INVALID_OPERATION;       // not both
70    if (!((flags & kOpenReadOnly) || (flags & kOpenReadWrite)))
71        return INVALID_OPERATION;       // not neither
72    if ((flags & kOpenCreate) && !(flags & kOpenReadWrite))
73        return INVALID_OPERATION;       // create requires write
74
75    if (flags & kOpenTruncate) {
76        newArchive = true;
77    } else {
78        newArchive = (access(zipFileName, F_OK) != 0);
79        if (!(flags & kOpenCreate) && newArchive) {
80            /* not creating, must already exist */
81            ALOGD("File %s does not exist", zipFileName);
82            return NAME_NOT_FOUND;
83        }
84    }
85
86    /* open the file */
87    const char* openflags;
88    if (flags & kOpenReadWrite) {
89        if (newArchive)
90            openflags = FILE_OPEN_RW_CREATE;
91        else
92            openflags = FILE_OPEN_RW;
93    } else {
94        openflags = FILE_OPEN_RO;
95    }
96    mZipFp = fopen(zipFileName, openflags);
97    if (mZipFp == NULL) {
98        int err = errno;
99        ALOGD("fopen failed: %d\n", err);
100        return errnoToStatus(err);
101    }
102
103    status_t result;
104    if (!newArchive) {
105        /*
106         * Load the central directory.  If that fails, then this probably
107         * isn't a Zip archive.
108         */
109        result = readCentralDir();
110    } else {
111        /*
112         * Newly-created.  The EndOfCentralDir constructor actually
113         * sets everything to be the way we want it (all zeroes).  We
114         * set mNeedCDRewrite so that we create *something* if the
115         * caller doesn't add any files.  (We could also just unlink
116         * the file if it's brand new and nothing was added, but that's
117         * probably doing more than we really should -- the user might
118         * have a need for empty zip files.)
119         */
120        mNeedCDRewrite = true;
121        result = NO_ERROR;
122    }
123
124    if (flags & kOpenReadOnly)
125        mReadOnly = true;
126    else
127        assert(!mReadOnly);
128
129    return result;
130}
131
132/*
133 * Return the Nth entry in the archive.
134 */
135ZipEntry* ZipFile::getEntryByIndex(int idx) const
136{
137    if (idx < 0 || idx >= (int) mEntries.size())
138        return NULL;
139
140    return mEntries[idx];
141}
142
143/*
144 * Find an entry by name.
145 */
146ZipEntry* ZipFile::getEntryByName(const char* fileName) const
147{
148    /*
149     * Do a stupid linear string-compare search.
150     *
151     * There are various ways to speed this up, especially since it's rare
152     * to intermingle changes to the archive with "get by name" calls.  We
153     * don't want to sort the mEntries vector itself, however, because
154     * it's used to recreate the Central Directory.
155     *
156     * (Hash table works, parallel list of pointers in sorted order is good.)
157     */
158    int idx;
159
160    for (idx = mEntries.size()-1; idx >= 0; idx--) {
161        ZipEntry* pEntry = mEntries[idx];
162        if (!pEntry->getDeleted() &&
163            strcmp(fileName, pEntry->getFileName()) == 0)
164        {
165            return pEntry;
166        }
167    }
168
169    return NULL;
170}
171
172/*
173 * Empty the mEntries vector.
174 */
175void ZipFile::discardEntries(void)
176{
177    int count = mEntries.size();
178
179    while (--count >= 0)
180        delete mEntries[count];
181
182    mEntries.clear();
183}
184
185
186/*
187 * Find the central directory and read the contents.
188 *
189 * The fun thing about ZIP archives is that they may or may not be
190 * readable from start to end.  In some cases, notably for archives
191 * that were written to stdout, the only length information is in the
192 * central directory at the end of the file.
193 *
194 * Of course, the central directory can be followed by a variable-length
195 * comment field, so we have to scan through it backwards.  The comment
196 * is at most 64K, plus we have 18 bytes for the end-of-central-dir stuff
197 * itself, plus apparently sometimes people throw random junk on the end
198 * just for the fun of it.
199 *
200 * This is all a little wobbly.  If the wrong value ends up in the EOCD
201 * area, we're hosed.  This appears to be the way that everbody handles
202 * it though, so we're in pretty good company if this fails.
203 */
204status_t ZipFile::readCentralDir(void)
205{
206    status_t result = NO_ERROR;
207    unsigned char* buf = NULL;
208    off_t fileLength, seekStart;
209    long readAmount;
210    int i;
211
212    fseek(mZipFp, 0, SEEK_END);
213    fileLength = ftell(mZipFp);
214    rewind(mZipFp);
215
216    /* too small to be a ZIP archive? */
217    if (fileLength < EndOfCentralDir::kEOCDLen) {
218        ALOGD("Length is %ld -- too small\n", (long)fileLength);
219        result = INVALID_OPERATION;
220        goto bail;
221    }
222
223    buf = new unsigned char[EndOfCentralDir::kMaxEOCDSearch];
224    if (buf == NULL) {
225        ALOGD("Failure allocating %d bytes for EOCD search",
226             EndOfCentralDir::kMaxEOCDSearch);
227        result = NO_MEMORY;
228        goto bail;
229    }
230
231    if (fileLength > EndOfCentralDir::kMaxEOCDSearch) {
232        seekStart = fileLength - EndOfCentralDir::kMaxEOCDSearch;
233        readAmount = EndOfCentralDir::kMaxEOCDSearch;
234    } else {
235        seekStart = 0;
236        readAmount = (long) fileLength;
237    }
238    if (fseek(mZipFp, seekStart, SEEK_SET) != 0) {
239        ALOGD("Failure seeking to end of zip at %ld", (long) seekStart);
240        result = UNKNOWN_ERROR;
241        goto bail;
242    }
243
244    /* read the last part of the file into the buffer */
245    if (fread(buf, 1, readAmount, mZipFp) != (size_t) readAmount) {
246        ALOGD("short file? wanted %ld\n", readAmount);
247        result = UNKNOWN_ERROR;
248        goto bail;
249    }
250
251    /* find the end-of-central-dir magic */
252    for (i = readAmount - 4; i >= 0; i--) {
253        if (buf[i] == 0x50 &&
254            ZipEntry::getLongLE(&buf[i]) == EndOfCentralDir::kSignature)
255        {
256            ALOGV("+++ Found EOCD at buf+%d\n", i);
257            break;
258        }
259    }
260    if (i < 0) {
261        ALOGD("EOCD not found, not Zip\n");
262        result = INVALID_OPERATION;
263        goto bail;
264    }
265
266    /* extract eocd values */
267    result = mEOCD.readBuf(buf + i, readAmount - i);
268    if (result != NO_ERROR) {
269        ALOGD("Failure reading %ld bytes of EOCD values", readAmount - i);
270        goto bail;
271    }
272    //mEOCD.dump();
273
274    if (mEOCD.mDiskNumber != 0 || mEOCD.mDiskWithCentralDir != 0 ||
275        mEOCD.mNumEntries != mEOCD.mTotalNumEntries)
276    {
277        ALOGD("Archive spanning not supported\n");
278        result = INVALID_OPERATION;
279        goto bail;
280    }
281
282    /*
283     * So far so good.  "mCentralDirSize" is the size in bytes of the
284     * central directory, so we can just seek back that far to find it.
285     * We can also seek forward mCentralDirOffset bytes from the
286     * start of the file.
287     *
288     * We're not guaranteed to have the rest of the central dir in the
289     * buffer, nor are we guaranteed that the central dir will have any
290     * sort of convenient size.  We need to skip to the start of it and
291     * read the header, then the other goodies.
292     *
293     * The only thing we really need right now is the file comment, which
294     * we're hoping to preserve.
295     */
296    if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0) {
297        ALOGD("Failure seeking to central dir offset %ld\n",
298             mEOCD.mCentralDirOffset);
299        result = UNKNOWN_ERROR;
300        goto bail;
301    }
302
303    /*
304     * Loop through and read the central dir entries.
305     */
306    ALOGV("Scanning %d entries...\n", mEOCD.mTotalNumEntries);
307    int entry;
308    for (entry = 0; entry < mEOCD.mTotalNumEntries; entry++) {
309        ZipEntry* pEntry = new ZipEntry;
310
311        result = pEntry->initFromCDE(mZipFp);
312        if (result != NO_ERROR) {
313            ALOGD("initFromCDE failed\n");
314            delete pEntry;
315            goto bail;
316        }
317
318        mEntries.add(pEntry);
319    }
320
321
322    /*
323     * If all went well, we should now be back at the EOCD.
324     */
325    {
326        unsigned char checkBuf[4];
327        if (fread(checkBuf, 1, 4, mZipFp) != 4) {
328            ALOGD("EOCD check read failed\n");
329            result = INVALID_OPERATION;
330            goto bail;
331        }
332        if (ZipEntry::getLongLE(checkBuf) != EndOfCentralDir::kSignature) {
333            ALOGD("EOCD read check failed\n");
334            result = UNKNOWN_ERROR;
335            goto bail;
336        }
337        ALOGV("+++ EOCD read check passed\n");
338    }
339
340bail:
341    delete[] buf;
342    return result;
343}
344
345
346/*
347 * Add a new file to the archive.
348 *
349 * This requires creating and populating a ZipEntry structure, and copying
350 * the data into the file at the appropriate position.  The "appropriate
351 * position" is the current location of the central directory, which we
352 * casually overwrite (we can put it back later).
353 *
354 * If we were concerned about safety, we would want to make all changes
355 * in a temp file and then overwrite the original after everything was
356 * safely written.  Not really a concern for us.
357 */
358status_t ZipFile::addCommon(const char* fileName, const void* data, size_t size,
359    const char* storageName, int sourceType, int compressionMethod,
360    ZipEntry** ppEntry)
361{
362    ZipEntry* pEntry = NULL;
363    status_t result = NO_ERROR;
364    long lfhPosn, startPosn, endPosn, uncompressedLen;
365    FILE* inputFp = NULL;
366    unsigned long crc;
367    time_t modWhen;
368
369    if (mReadOnly)
370        return INVALID_OPERATION;
371
372    assert(compressionMethod == ZipEntry::kCompressDeflated ||
373           compressionMethod == ZipEntry::kCompressStored);
374
375    /* make sure we're in a reasonable state */
376    assert(mZipFp != NULL);
377    assert(mEntries.size() == mEOCD.mTotalNumEntries);
378
379    /* make sure it doesn't already exist */
380    if (getEntryByName(storageName) != NULL)
381        return ALREADY_EXISTS;
382
383    if (!data) {
384        inputFp = fopen(fileName, FILE_OPEN_RO);
385        if (inputFp == NULL)
386            return errnoToStatus(errno);
387    }
388
389    if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0) {
390        result = UNKNOWN_ERROR;
391        goto bail;
392    }
393
394    pEntry = new ZipEntry;
395    pEntry->initNew(storageName, NULL);
396
397    /*
398     * From here on out, failures are more interesting.
399     */
400    mNeedCDRewrite = true;
401
402    /*
403     * Write the LFH, even though it's still mostly blank.  We need it
404     * as a place-holder.  In theory the LFH isn't necessary, but in
405     * practice some utilities demand it.
406     */
407    lfhPosn = ftell(mZipFp);
408    pEntry->mLFH.write(mZipFp);
409    startPosn = ftell(mZipFp);
410
411    /*
412     * Copy the data in, possibly compressing it as we go.
413     */
414    if (sourceType == ZipEntry::kCompressStored) {
415        if (compressionMethod == ZipEntry::kCompressDeflated) {
416            bool failed = false;
417            result = compressFpToFp(mZipFp, inputFp, data, size, &crc);
418            if (result != NO_ERROR) {
419                ALOGD("compression failed, storing\n");
420                failed = true;
421            } else {
422                /*
423                 * Make sure it has compressed "enough".  This probably ought
424                 * to be set through an API call, but I don't expect our
425                 * criteria to change over time.
426                 */
427                long src = inputFp ? ftell(inputFp) : size;
428                long dst = ftell(mZipFp) - startPosn;
429                if (dst + (dst / 10) > src) {
430                    ALOGD("insufficient compression (src=%ld dst=%ld), storing\n",
431                        src, dst);
432                    failed = true;
433                }
434            }
435
436            if (failed) {
437                compressionMethod = ZipEntry::kCompressStored;
438                if (inputFp) rewind(inputFp);
439                fseek(mZipFp, startPosn, SEEK_SET);
440                /* fall through to kCompressStored case */
441            }
442        }
443        /* handle "no compression" request, or failed compression from above */
444        if (compressionMethod == ZipEntry::kCompressStored) {
445            if (inputFp) {
446                result = copyFpToFp(mZipFp, inputFp, &crc);
447            } else {
448                result = copyDataToFp(mZipFp, data, size, &crc);
449            }
450            if (result != NO_ERROR) {
451                // don't need to truncate; happens in CDE rewrite
452                ALOGD("failed copying data in\n");
453                goto bail;
454            }
455        }
456
457        // currently seeked to end of file
458        uncompressedLen = inputFp ? ftell(inputFp) : size;
459    } else if (sourceType == ZipEntry::kCompressDeflated) {
460        /* we should support uncompressed-from-compressed, but it's not
461         * important right now */
462        assert(compressionMethod == ZipEntry::kCompressDeflated);
463
464        bool scanResult;
465        int method;
466        long compressedLen;
467
468        scanResult = ZipUtils::examineGzip(inputFp, &method, &uncompressedLen,
469                        &compressedLen, &crc);
470        if (!scanResult || method != ZipEntry::kCompressDeflated) {
471            ALOGD("this isn't a deflated gzip file?");
472            result = UNKNOWN_ERROR;
473            goto bail;
474        }
475
476        result = copyPartialFpToFp(mZipFp, inputFp, compressedLen, NULL);
477        if (result != NO_ERROR) {
478            ALOGD("failed copying gzip data in\n");
479            goto bail;
480        }
481    } else {
482        assert(false);
483        result = UNKNOWN_ERROR;
484        goto bail;
485    }
486
487    /*
488     * We could write the "Data Descriptor", but there doesn't seem to
489     * be any point since we're going to go back and write the LFH.
490     *
491     * Update file offsets.
492     */
493    endPosn = ftell(mZipFp);            // seeked to end of compressed data
494
495    /*
496     * Success!  Fill out new values.
497     */
498    pEntry->setDataInfo(uncompressedLen, endPosn - startPosn, crc,
499        compressionMethod);
500    modWhen = getModTime(inputFp ? fileno(inputFp) : fileno(mZipFp));
501    pEntry->setModWhen(modWhen);
502    pEntry->setLFHOffset(lfhPosn);
503    mEOCD.mNumEntries++;
504    mEOCD.mTotalNumEntries++;
505    mEOCD.mCentralDirSize = 0;      // mark invalid; set by flush()
506    mEOCD.mCentralDirOffset = endPosn;
507
508    /*
509     * Go back and write the LFH.
510     */
511    if (fseek(mZipFp, lfhPosn, SEEK_SET) != 0) {
512        result = UNKNOWN_ERROR;
513        goto bail;
514    }
515    pEntry->mLFH.write(mZipFp);
516
517    /*
518     * Add pEntry to the list.
519     */
520    mEntries.add(pEntry);
521    if (ppEntry != NULL)
522        *ppEntry = pEntry;
523    pEntry = NULL;
524
525bail:
526    if (inputFp != NULL)
527        fclose(inputFp);
528    delete pEntry;
529    return result;
530}
531
532/*
533 * Add an entry by copying it from another zip file.  If "padding" is
534 * nonzero, the specified number of bytes will be added to the "extra"
535 * field in the header.
536 *
537 * If "ppEntry" is non-NULL, a pointer to the new entry will be returned.
538 */
539status_t ZipFile::add(const ZipFile* pSourceZip, const ZipEntry* pSourceEntry,
540    int padding, ZipEntry** ppEntry)
541{
542    ZipEntry* pEntry = NULL;
543    status_t result;
544    long lfhPosn, endPosn;
545
546    if (mReadOnly)
547        return INVALID_OPERATION;
548
549    /* make sure we're in a reasonable state */
550    assert(mZipFp != NULL);
551    assert(mEntries.size() == mEOCD.mTotalNumEntries);
552
553    if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0) {
554        result = UNKNOWN_ERROR;
555        goto bail;
556    }
557
558    pEntry = new ZipEntry;
559    if (pEntry == NULL) {
560        result = NO_MEMORY;
561        goto bail;
562    }
563
564    result = pEntry->initFromExternal(pSourceZip, pSourceEntry);
565    if (result != NO_ERROR)
566        goto bail;
567    if (padding != 0) {
568        result = pEntry->addPadding(padding);
569        if (result != NO_ERROR)
570            goto bail;
571    }
572
573    /*
574     * From here on out, failures are more interesting.
575     */
576    mNeedCDRewrite = true;
577
578    /*
579     * Write the LFH.  Since we're not recompressing the data, we already
580     * have all of the fields filled out.
581     */
582    lfhPosn = ftell(mZipFp);
583    pEntry->mLFH.write(mZipFp);
584
585    /*
586     * Copy the data over.
587     *
588     * If the "has data descriptor" flag is set, we want to copy the DD
589     * fields as well.  This is a fixed-size area immediately following
590     * the data.
591     */
592    if (fseek(pSourceZip->mZipFp, pSourceEntry->getFileOffset(), SEEK_SET) != 0)
593    {
594        result = UNKNOWN_ERROR;
595        goto bail;
596    }
597
598    off_t copyLen;
599    copyLen = pSourceEntry->getCompressedLen();
600    if ((pSourceEntry->mLFH.mGPBitFlag & ZipEntry::kUsesDataDescr) != 0)
601        copyLen += ZipEntry::kDataDescriptorLen;
602
603    if (copyPartialFpToFp(mZipFp, pSourceZip->mZipFp, copyLen, NULL)
604        != NO_ERROR)
605    {
606        ALOGW("copy of '%s' failed\n", pEntry->mCDE.mFileName);
607        result = UNKNOWN_ERROR;
608        goto bail;
609    }
610
611    /*
612     * Update file offsets.
613     */
614    endPosn = ftell(mZipFp);
615
616    /*
617     * Success!  Fill out new values.
618     */
619    pEntry->setLFHOffset(lfhPosn);      // sets mCDE.mLocalHeaderRelOffset
620    mEOCD.mNumEntries++;
621    mEOCD.mTotalNumEntries++;
622    mEOCD.mCentralDirSize = 0;      // mark invalid; set by flush()
623    mEOCD.mCentralDirOffset = endPosn;
624
625    /*
626     * Add pEntry to the list.
627     */
628    mEntries.add(pEntry);
629    if (ppEntry != NULL)
630        *ppEntry = pEntry;
631    pEntry = NULL;
632
633    result = NO_ERROR;
634
635bail:
636    delete pEntry;
637    return result;
638}
639
640/*
641 * Copy all of the bytes in "src" to "dst".
642 *
643 * On exit, "srcFp" will be seeked to the end of the file, and "dstFp"
644 * will be seeked immediately past the data.
645 */
646status_t ZipFile::copyFpToFp(FILE* dstFp, FILE* srcFp, unsigned long* pCRC32)
647{
648    unsigned char tmpBuf[32768];
649    size_t count;
650
651    *pCRC32 = crc32(0L, Z_NULL, 0);
652
653    while (1) {
654        count = fread(tmpBuf, 1, sizeof(tmpBuf), srcFp);
655        if (ferror(srcFp) || ferror(dstFp))
656            return errnoToStatus(errno);
657        if (count == 0)
658            break;
659
660        *pCRC32 = crc32(*pCRC32, tmpBuf, count);
661
662        if (fwrite(tmpBuf, 1, count, dstFp) != count) {
663            ALOGD("fwrite %d bytes failed\n", (int) count);
664            return UNKNOWN_ERROR;
665        }
666    }
667
668    return NO_ERROR;
669}
670
671/*
672 * Copy all of the bytes in "src" to "dst".
673 *
674 * On exit, "dstFp" will be seeked immediately past the data.
675 */
676status_t ZipFile::copyDataToFp(FILE* dstFp,
677    const void* data, size_t size, unsigned long* pCRC32)
678{
679    size_t count;
680
681    *pCRC32 = crc32(0L, Z_NULL, 0);
682    if (size > 0) {
683        *pCRC32 = crc32(*pCRC32, (const unsigned char*)data, size);
684        if (fwrite(data, 1, size, dstFp) != size) {
685            ALOGD("fwrite %d bytes failed\n", (int) size);
686            return UNKNOWN_ERROR;
687        }
688    }
689
690    return NO_ERROR;
691}
692
693/*
694 * Copy some of the bytes in "src" to "dst".
695 *
696 * If "pCRC32" is NULL, the CRC will not be computed.
697 *
698 * On exit, "srcFp" will be seeked to the end of the file, and "dstFp"
699 * will be seeked immediately past the data just written.
700 */
701status_t ZipFile::copyPartialFpToFp(FILE* dstFp, FILE* srcFp, long length,
702    unsigned long* pCRC32)
703{
704    unsigned char tmpBuf[32768];
705    size_t count;
706
707    if (pCRC32 != NULL)
708        *pCRC32 = crc32(0L, Z_NULL, 0);
709
710    while (length) {
711        long readSize;
712
713        readSize = sizeof(tmpBuf);
714        if (readSize > length)
715            readSize = length;
716
717        count = fread(tmpBuf, 1, readSize, srcFp);
718        if ((long) count != readSize) {     // error or unexpected EOF
719            ALOGD("fread %d bytes failed\n", (int) readSize);
720            return UNKNOWN_ERROR;
721        }
722
723        if (pCRC32 != NULL)
724            *pCRC32 = crc32(*pCRC32, tmpBuf, count);
725
726        if (fwrite(tmpBuf, 1, count, dstFp) != count) {
727            ALOGD("fwrite %d bytes failed\n", (int) count);
728            return UNKNOWN_ERROR;
729        }
730
731        length -= readSize;
732    }
733
734    return NO_ERROR;
735}
736
737/*
738 * Compress all of the data in "srcFp" and write it to "dstFp".
739 *
740 * On exit, "srcFp" will be seeked to the end of the file, and "dstFp"
741 * will be seeked immediately past the compressed data.
742 */
743status_t ZipFile::compressFpToFp(FILE* dstFp, FILE* srcFp,
744    const void* data, size_t size, unsigned long* pCRC32)
745{
746    status_t result = NO_ERROR;
747    const size_t kBufSize = 32768;
748    unsigned char* inBuf = NULL;
749    unsigned char* outBuf = NULL;
750    z_stream zstream;
751    bool atEof = false;     // no feof() aviailable yet
752    unsigned long crc;
753    int zerr;
754
755    /*
756     * Create an input buffer and an output buffer.
757     */
758    inBuf = new unsigned char[kBufSize];
759    outBuf = new unsigned char[kBufSize];
760    if (inBuf == NULL || outBuf == NULL) {
761        result = NO_MEMORY;
762        goto bail;
763    }
764
765    /*
766     * Initialize the zlib stream.
767     */
768    memset(&zstream, 0, sizeof(zstream));
769    zstream.zalloc = Z_NULL;
770    zstream.zfree = Z_NULL;
771    zstream.opaque = Z_NULL;
772    zstream.next_in = NULL;
773    zstream.avail_in = 0;
774    zstream.next_out = outBuf;
775    zstream.avail_out = kBufSize;
776    zstream.data_type = Z_UNKNOWN;
777
778    zerr = deflateInit2(&zstream, Z_BEST_COMPRESSION,
779        Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL, Z_DEFAULT_STRATEGY);
780    if (zerr != Z_OK) {
781        result = UNKNOWN_ERROR;
782        if (zerr == Z_VERSION_ERROR) {
783            ALOGE("Installed zlib is not compatible with linked version (%s)\n",
784                ZLIB_VERSION);
785        } else {
786            ALOGD("Call to deflateInit2 failed (zerr=%d)\n", zerr);
787        }
788        goto bail;
789    }
790
791    crc = crc32(0L, Z_NULL, 0);
792
793    /*
794     * Loop while we have data.
795     */
796    do {
797        size_t getSize;
798        int flush;
799
800        /* only read if the input buffer is empty */
801        if (zstream.avail_in == 0 && !atEof) {
802            ALOGV("+++ reading %d bytes\n", (int)kBufSize);
803            if (data) {
804                getSize = size > kBufSize ? kBufSize : size;
805                memcpy(inBuf, data, getSize);
806                data = ((const char*)data) + getSize;
807                size -= getSize;
808            } else {
809                getSize = fread(inBuf, 1, kBufSize, srcFp);
810                if (ferror(srcFp)) {
811                    ALOGD("deflate read failed (errno=%d)\n", errno);
812                    goto z_bail;
813                }
814            }
815            if (getSize < kBufSize) {
816                ALOGV("+++  got %d bytes, EOF reached\n",
817                    (int)getSize);
818                atEof = true;
819            }
820
821            crc = crc32(crc, inBuf, getSize);
822
823            zstream.next_in = inBuf;
824            zstream.avail_in = getSize;
825        }
826
827        if (atEof)
828            flush = Z_FINISH;       /* tell zlib that we're done */
829        else
830            flush = Z_NO_FLUSH;     /* more to come! */
831
832        zerr = deflate(&zstream, flush);
833        if (zerr != Z_OK && zerr != Z_STREAM_END) {
834            ALOGD("zlib deflate call failed (zerr=%d)\n", zerr);
835            result = UNKNOWN_ERROR;
836            goto z_bail;
837        }
838
839        /* write when we're full or when we're done */
840        if (zstream.avail_out == 0 ||
841            (zerr == Z_STREAM_END && zstream.avail_out != (uInt) kBufSize))
842        {
843            ALOGV("+++ writing %d bytes\n", (int) (zstream.next_out - outBuf));
844            if (fwrite(outBuf, 1, zstream.next_out - outBuf, dstFp) !=
845                (size_t)(zstream.next_out - outBuf))
846            {
847                ALOGD("write %d failed in deflate\n",
848                    (int) (zstream.next_out - outBuf));
849                goto z_bail;
850            }
851
852            zstream.next_out = outBuf;
853            zstream.avail_out = kBufSize;
854        }
855    } while (zerr == Z_OK);
856
857    assert(zerr == Z_STREAM_END);       /* other errors should've been caught */
858
859    *pCRC32 = crc;
860
861z_bail:
862    deflateEnd(&zstream);        /* free up any allocated structures */
863
864bail:
865    delete[] inBuf;
866    delete[] outBuf;
867
868    return result;
869}
870
871/*
872 * Mark an entry as deleted.
873 *
874 * We will eventually need to crunch the file down, but if several files
875 * are being removed (perhaps as part of an "update" process) we can make
876 * things considerably faster by deferring the removal to "flush" time.
877 */
878status_t ZipFile::remove(ZipEntry* pEntry)
879{
880    /*
881     * Should verify that pEntry is actually part of this archive, and
882     * not some stray ZipEntry from a different file.
883     */
884
885    /* mark entry as deleted, and mark archive as dirty */
886    pEntry->setDeleted();
887    mNeedCDRewrite = true;
888    return NO_ERROR;
889}
890
891/*
892 * Flush any pending writes.
893 *
894 * In particular, this will crunch out deleted entries, and write the
895 * Central Directory and EOCD if we have stomped on them.
896 */
897status_t ZipFile::flush(void)
898{
899    status_t result = NO_ERROR;
900    long eocdPosn;
901    int i, count;
902
903    if (mReadOnly)
904        return INVALID_OPERATION;
905    if (!mNeedCDRewrite)
906        return NO_ERROR;
907
908    assert(mZipFp != NULL);
909
910    result = crunchArchive();
911    if (result != NO_ERROR)
912        return result;
913
914    if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0)
915        return UNKNOWN_ERROR;
916
917    count = mEntries.size();
918    for (i = 0; i < count; i++) {
919        ZipEntry* pEntry = mEntries[i];
920        pEntry->mCDE.write(mZipFp);
921    }
922
923    eocdPosn = ftell(mZipFp);
924    mEOCD.mCentralDirSize = eocdPosn - mEOCD.mCentralDirOffset;
925
926    mEOCD.write(mZipFp);
927
928    /*
929     * If we had some stuff bloat up during compression and get replaced
930     * with plain files, or if we deleted some entries, there's a lot
931     * of wasted space at the end of the file.  Remove it now.
932     */
933    if (ftruncate(fileno(mZipFp), ftell(mZipFp)) != 0) {
934        ALOGW("ftruncate failed %ld: %s\n", ftell(mZipFp), strerror(errno));
935        // not fatal
936    }
937
938    /* should we clear the "newly added" flag in all entries now? */
939
940    mNeedCDRewrite = false;
941    return NO_ERROR;
942}
943
944/*
945 * Crunch deleted files out of an archive by shifting the later files down.
946 *
947 * Because we're not using a temp file, we do the operation inside the
948 * current file.
949 */
950status_t ZipFile::crunchArchive(void)
951{
952    status_t result = NO_ERROR;
953    int i, count;
954    long delCount, adjust;
955
956#if 0
957    printf("CONTENTS:\n");
958    for (i = 0; i < (int) mEntries.size(); i++) {
959        printf(" %d: lfhOff=%ld del=%d\n",
960            i, mEntries[i]->getLFHOffset(), mEntries[i]->getDeleted());
961    }
962    printf("  END is %ld\n", (long) mEOCD.mCentralDirOffset);
963#endif
964
965    /*
966     * Roll through the set of files, shifting them as appropriate.  We
967     * could probably get a slight performance improvement by sliding
968     * multiple files down at once (because we could use larger reads
969     * when operating on batches of small files), but it's not that useful.
970     */
971    count = mEntries.size();
972    delCount = adjust = 0;
973    for (i = 0; i < count; i++) {
974        ZipEntry* pEntry = mEntries[i];
975        long span;
976
977        if (pEntry->getLFHOffset() != 0) {
978            long nextOffset;
979
980            /* Get the length of this entry by finding the offset
981             * of the next entry.  Directory entries don't have
982             * file offsets, so we need to find the next non-directory
983             * entry.
984             */
985            nextOffset = 0;
986            for (int ii = i+1; nextOffset == 0 && ii < count; ii++)
987                nextOffset = mEntries[ii]->getLFHOffset();
988            if (nextOffset == 0)
989                nextOffset = mEOCD.mCentralDirOffset;
990            span = nextOffset - pEntry->getLFHOffset();
991
992            assert(span >= ZipEntry::LocalFileHeader::kLFHLen);
993        } else {
994            /* This is a directory entry.  It doesn't have
995             * any actual file contents, so there's no need to
996             * move anything.
997             */
998            span = 0;
999        }
1000
1001        //printf("+++ %d: off=%ld span=%ld del=%d [count=%d]\n",
1002        //    i, pEntry->getLFHOffset(), span, pEntry->getDeleted(), count);
1003
1004        if (pEntry->getDeleted()) {
1005            adjust += span;
1006            delCount++;
1007
1008            delete pEntry;
1009            mEntries.removeAt(i);
1010
1011            /* adjust loop control */
1012            count--;
1013            i--;
1014        } else if (span != 0 && adjust > 0) {
1015            /* shuffle this entry back */
1016            //printf("+++ Shuffling '%s' back %ld\n",
1017            //    pEntry->getFileName(), adjust);
1018            result = filemove(mZipFp, pEntry->getLFHOffset() - adjust,
1019                        pEntry->getLFHOffset(), span);
1020            if (result != NO_ERROR) {
1021                /* this is why you use a temp file */
1022                ALOGE("error during crunch - archive is toast\n");
1023                return result;
1024            }
1025
1026            pEntry->setLFHOffset(pEntry->getLFHOffset() - adjust);
1027        }
1028    }
1029
1030    /*
1031     * Fix EOCD info.  We have to wait until the end to do some of this
1032     * because we use mCentralDirOffset to determine "span" for the
1033     * last entry.
1034     */
1035    mEOCD.mCentralDirOffset -= adjust;
1036    mEOCD.mNumEntries -= delCount;
1037    mEOCD.mTotalNumEntries -= delCount;
1038    mEOCD.mCentralDirSize = 0;  // mark invalid; set by flush()
1039
1040    assert(mEOCD.mNumEntries == mEOCD.mTotalNumEntries);
1041    assert(mEOCD.mNumEntries == count);
1042
1043    return result;
1044}
1045
1046/*
1047 * Works like memmove(), but on pieces of a file.
1048 */
1049status_t ZipFile::filemove(FILE* fp, off_t dst, off_t src, size_t n)
1050{
1051    if (dst == src || n <= 0)
1052        return NO_ERROR;
1053
1054    unsigned char readBuf[32768];
1055
1056    if (dst < src) {
1057        /* shift stuff toward start of file; must read from start */
1058        while (n != 0) {
1059            size_t getSize = sizeof(readBuf);
1060            if (getSize > n)
1061                getSize = n;
1062
1063            if (fseek(fp, (long) src, SEEK_SET) != 0) {
1064                ALOGD("filemove src seek %ld failed\n", (long) src);
1065                return UNKNOWN_ERROR;
1066            }
1067
1068            if (fread(readBuf, 1, getSize, fp) != getSize) {
1069                ALOGD("filemove read %ld off=%ld failed\n",
1070                    (long) getSize, (long) src);
1071                return UNKNOWN_ERROR;
1072            }
1073
1074            if (fseek(fp, (long) dst, SEEK_SET) != 0) {
1075                ALOGD("filemove dst seek %ld failed\n", (long) dst);
1076                return UNKNOWN_ERROR;
1077            }
1078
1079            if (fwrite(readBuf, 1, getSize, fp) != getSize) {
1080                ALOGD("filemove write %ld off=%ld failed\n",
1081                    (long) getSize, (long) dst);
1082                return UNKNOWN_ERROR;
1083            }
1084
1085            src += getSize;
1086            dst += getSize;
1087            n -= getSize;
1088        }
1089    } else {
1090        /* shift stuff toward end of file; must read from end */
1091        assert(false);      // write this someday, maybe
1092        return UNKNOWN_ERROR;
1093    }
1094
1095    return NO_ERROR;
1096}
1097
1098
1099/*
1100 * Get the modification time from a file descriptor.
1101 */
1102time_t ZipFile::getModTime(int fd)
1103{
1104    struct stat sb;
1105
1106    if (fstat(fd, &sb) < 0) {
1107        ALOGD("HEY: fstat on fd %d failed\n", fd);
1108        return (time_t) -1;
1109    }
1110
1111    return sb.st_mtime;
1112}
1113
1114
1115#if 0       /* this is a bad idea */
1116/*
1117 * Get a copy of the Zip file descriptor.
1118 *
1119 * We don't allow this if the file was opened read-write because we tend
1120 * to leave the file contents in an uncertain state between calls to
1121 * flush().  The duplicated file descriptor should only be valid for reads.
1122 */
1123int ZipFile::getZipFd(void) const
1124{
1125    if (!mReadOnly)
1126        return INVALID_OPERATION;
1127    assert(mZipFp != NULL);
1128
1129    int fd;
1130    fd = dup(fileno(mZipFp));
1131    if (fd < 0) {
1132        ALOGD("didn't work, errno=%d\n", errno);
1133    }
1134
1135    return fd;
1136}
1137#endif
1138
1139
1140#if 0
1141/*
1142 * Expand data.
1143 */
1144bool ZipFile::uncompress(const ZipEntry* pEntry, void* buf) const
1145{
1146    return false;
1147}
1148#endif
1149
1150// free the memory when you're done
1151void* ZipFile::uncompress(const ZipEntry* entry)
1152{
1153    size_t unlen = entry->getUncompressedLen();
1154    size_t clen = entry->getCompressedLen();
1155
1156    void* buf = malloc(unlen);
1157    if (buf == NULL) {
1158        return NULL;
1159    }
1160
1161    fseek(mZipFp, 0, SEEK_SET);
1162
1163    off_t offset = entry->getFileOffset();
1164    if (fseek(mZipFp, offset, SEEK_SET) != 0) {
1165        goto bail;
1166    }
1167
1168    switch (entry->getCompressionMethod())
1169    {
1170        case ZipEntry::kCompressStored: {
1171            ssize_t amt = fread(buf, 1, unlen, mZipFp);
1172            if (amt != (ssize_t)unlen) {
1173                goto bail;
1174            }
1175#if 0
1176            printf("data...\n");
1177            const unsigned char* p = (unsigned char*)buf;
1178            const unsigned char* end = p+unlen;
1179            for (int i=0; i<32 && p < end; i++) {
1180                printf("0x%08x ", (int)(offset+(i*0x10)));
1181                for (int j=0; j<0x10 && p < end; j++) {
1182                    printf(" %02x", *p);
1183                    p++;
1184                }
1185                printf("\n");
1186            }
1187#endif
1188
1189            }
1190            break;
1191        case ZipEntry::kCompressDeflated: {
1192            if (!ZipUtils::inflateToBuffer(mZipFp, buf, unlen, clen)) {
1193                goto bail;
1194            }
1195            }
1196            break;
1197        default:
1198            goto bail;
1199    }
1200    return buf;
1201
1202bail:
1203    free(buf);
1204    return NULL;
1205}
1206
1207
1208/*
1209 * ===========================================================================
1210 *      ZipFile::EndOfCentralDir
1211 * ===========================================================================
1212 */
1213
1214/*
1215 * Read the end-of-central-dir fields.
1216 *
1217 * "buf" should be positioned at the EOCD signature, and should contain
1218 * the entire EOCD area including the comment.
1219 */
1220status_t ZipFile::EndOfCentralDir::readBuf(const unsigned char* buf, int len)
1221{
1222    /* don't allow re-use */
1223    assert(mComment == NULL);
1224
1225    if (len < kEOCDLen) {
1226        /* looks like ZIP file got truncated */
1227        ALOGD(" Zip EOCD: expected >= %d bytes, found %d\n",
1228            kEOCDLen, len);
1229        return INVALID_OPERATION;
1230    }
1231
1232    /* this should probably be an assert() */
1233    if (ZipEntry::getLongLE(&buf[0x00]) != kSignature)
1234        return UNKNOWN_ERROR;
1235
1236    mDiskNumber = ZipEntry::getShortLE(&buf[0x04]);
1237    mDiskWithCentralDir = ZipEntry::getShortLE(&buf[0x06]);
1238    mNumEntries = ZipEntry::getShortLE(&buf[0x08]);
1239    mTotalNumEntries = ZipEntry::getShortLE(&buf[0x0a]);
1240    mCentralDirSize = ZipEntry::getLongLE(&buf[0x0c]);
1241    mCentralDirOffset = ZipEntry::getLongLE(&buf[0x10]);
1242    mCommentLen = ZipEntry::getShortLE(&buf[0x14]);
1243
1244    // TODO: validate mCentralDirOffset
1245
1246    if (mCommentLen > 0) {
1247        if (kEOCDLen + mCommentLen > len) {
1248            ALOGD("EOCD(%d) + comment(%d) exceeds len (%d)\n",
1249                kEOCDLen, mCommentLen, len);
1250            return UNKNOWN_ERROR;
1251        }
1252        mComment = new unsigned char[mCommentLen];
1253        memcpy(mComment, buf + kEOCDLen, mCommentLen);
1254    }
1255
1256    return NO_ERROR;
1257}
1258
1259/*
1260 * Write an end-of-central-directory section.
1261 */
1262status_t ZipFile::EndOfCentralDir::write(FILE* fp)
1263{
1264    unsigned char buf[kEOCDLen];
1265
1266    ZipEntry::putLongLE(&buf[0x00], kSignature);
1267    ZipEntry::putShortLE(&buf[0x04], mDiskNumber);
1268    ZipEntry::putShortLE(&buf[0x06], mDiskWithCentralDir);
1269    ZipEntry::putShortLE(&buf[0x08], mNumEntries);
1270    ZipEntry::putShortLE(&buf[0x0a], mTotalNumEntries);
1271    ZipEntry::putLongLE(&buf[0x0c], mCentralDirSize);
1272    ZipEntry::putLongLE(&buf[0x10], mCentralDirOffset);
1273    ZipEntry::putShortLE(&buf[0x14], mCommentLen);
1274
1275    if (fwrite(buf, 1, kEOCDLen, fp) != kEOCDLen)
1276        return UNKNOWN_ERROR;
1277    if (mCommentLen > 0) {
1278        assert(mComment != NULL);
1279        if (fwrite(mComment, mCommentLen, 1, fp) != mCommentLen)
1280            return UNKNOWN_ERROR;
1281    }
1282
1283    return NO_ERROR;
1284}
1285
1286/*
1287 * Dump the contents of an EndOfCentralDir object.
1288 */
1289void ZipFile::EndOfCentralDir::dump(void) const
1290{
1291    ALOGD(" EndOfCentralDir contents:\n");
1292    ALOGD("  diskNum=%u diskWCD=%u numEnt=%u totalNumEnt=%u\n",
1293        mDiskNumber, mDiskWithCentralDir, mNumEntries, mTotalNumEntries);
1294    ALOGD("  centDirSize=%lu centDirOff=%lu commentLen=%u\n",
1295        mCentralDirSize, mCentralDirOffset, mCommentLen);
1296}
1297
1298