ZipFile.cpp revision af1d7411825e589f09074c04bbbd80497b60e9e9
1/*
2 * Copyright (C) 2006 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//
18// Access to Zip archives.
19//
20
21#define LOG_TAG "zip"
22
23#include <androidfw/ZipUtils.h>
24#include <utils/Log.h>
25
26#include "ZipFile.h"
27
28#include <zlib.h>
29#define DEF_MEM_LEVEL 8                // normally in zutil.h?
30
31#include "zopfli/deflate.h"
32
33#include <memory.h>
34#include <sys/stat.h>
35#include <errno.h>
36#include <assert.h>
37#include <inttypes.h>
38
39using namespace android;
40
41/*
42 * Some environments require the "b", some choke on it.
43 */
44#define FILE_OPEN_RO        "rb"
45#define FILE_OPEN_RW        "r+b"
46#define FILE_OPEN_RW_CREATE "w+b"
47
48/* should live somewhere else? */
49static status_t errnoToStatus(int err)
50{
51    if (err == ENOENT)
52        return NAME_NOT_FOUND;
53    else if (err == EACCES)
54        return PERMISSION_DENIED;
55    else
56        return UNKNOWN_ERROR;
57}
58
59/*
60 * Open a file and parse its guts.
61 */
62status_t ZipFile::open(const char* zipFileName, int flags)
63{
64    bool newArchive = false;
65
66    assert(mZipFp == NULL);     // no reopen
67
68    if ((flags & kOpenTruncate))
69        flags |= kOpenCreate;           // trunc implies create
70
71    if ((flags & kOpenReadOnly) && (flags & kOpenReadWrite))
72        return INVALID_OPERATION;       // not both
73    if (!((flags & kOpenReadOnly) || (flags & kOpenReadWrite)))
74        return INVALID_OPERATION;       // not neither
75    if ((flags & kOpenCreate) && !(flags & kOpenReadWrite))
76        return INVALID_OPERATION;       // create requires write
77
78    if (flags & kOpenTruncate) {
79        newArchive = true;
80    } else {
81        newArchive = (access(zipFileName, F_OK) != 0);
82        if (!(flags & kOpenCreate) && newArchive) {
83            /* not creating, must already exist */
84            ALOGD("File %s does not exist", zipFileName);
85            return NAME_NOT_FOUND;
86        }
87    }
88
89    /* open the file */
90    const char* openflags;
91    if (flags & kOpenReadWrite) {
92        if (newArchive)
93            openflags = FILE_OPEN_RW_CREATE;
94        else
95            openflags = FILE_OPEN_RW;
96    } else {
97        openflags = FILE_OPEN_RO;
98    }
99    mZipFp = fopen(zipFileName, openflags);
100    if (mZipFp == NULL) {
101        int err = errno;
102        ALOGD("fopen failed: %d\n", err);
103        return errnoToStatus(err);
104    }
105
106    status_t result;
107    if (!newArchive) {
108        /*
109         * Load the central directory.  If that fails, then this probably
110         * isn't a Zip archive.
111         */
112        result = readCentralDir();
113    } else {
114        /*
115         * Newly-created.  The EndOfCentralDir constructor actually
116         * sets everything to be the way we want it (all zeroes).  We
117         * set mNeedCDRewrite so that we create *something* if the
118         * caller doesn't add any files.  (We could also just unlink
119         * the file if it's brand new and nothing was added, but that's
120         * probably doing more than we really should -- the user might
121         * have a need for empty zip files.)
122         */
123        mNeedCDRewrite = true;
124        result = NO_ERROR;
125    }
126
127    if (flags & kOpenReadOnly)
128        mReadOnly = true;
129    else
130        assert(!mReadOnly);
131
132    return result;
133}
134
135/*
136 * Return the Nth entry in the archive.
137 */
138ZipEntry* ZipFile::getEntryByIndex(int idx) const
139{
140    if (idx < 0 || idx >= (int) mEntries.size())
141        return NULL;
142
143    return mEntries[idx];
144}
145
146/*
147 * Find an entry by name.
148 */
149ZipEntry* ZipFile::getEntryByName(const char* fileName) const
150{
151    /*
152     * Do a stupid linear string-compare search.
153     *
154     * There are various ways to speed this up, especially since it's rare
155     * to intermingle changes to the archive with "get by name" calls.  We
156     * don't want to sort the mEntries vector itself, however, because
157     * it's used to recreate the Central Directory.
158     *
159     * (Hash table works, parallel list of pointers in sorted order is good.)
160     */
161    int idx;
162
163    for (idx = mEntries.size()-1; idx >= 0; idx--) {
164        ZipEntry* pEntry = mEntries[idx];
165        if (!pEntry->getDeleted() &&
166            strcmp(fileName, pEntry->getFileName()) == 0)
167        {
168            return pEntry;
169        }
170    }
171
172    return NULL;
173}
174
175/*
176 * Empty the mEntries vector.
177 */
178void ZipFile::discardEntries(void)
179{
180    int count = mEntries.size();
181
182    while (--count >= 0)
183        delete mEntries[count];
184
185    mEntries.clear();
186}
187
188
189/*
190 * Find the central directory and read the contents.
191 *
192 * The fun thing about ZIP archives is that they may or may not be
193 * readable from start to end.  In some cases, notably for archives
194 * that were written to stdout, the only length information is in the
195 * central directory at the end of the file.
196 *
197 * Of course, the central directory can be followed by a variable-length
198 * comment field, so we have to scan through it backwards.  The comment
199 * is at most 64K, plus we have 18 bytes for the end-of-central-dir stuff
200 * itself, plus apparently sometimes people throw random junk on the end
201 * just for the fun of it.
202 *
203 * This is all a little wobbly.  If the wrong value ends up in the EOCD
204 * area, we're hosed.  This appears to be the way that everbody handles
205 * it though, so we're in pretty good company if this fails.
206 */
207status_t ZipFile::readCentralDir(void)
208{
209    status_t result = NO_ERROR;
210    uint8_t* buf = NULL;
211    off_t fileLength, seekStart;
212    long readAmount;
213    int i;
214
215    fseek(mZipFp, 0, SEEK_END);
216    fileLength = ftell(mZipFp);
217    rewind(mZipFp);
218
219    /* too small to be a ZIP archive? */
220    if (fileLength < EndOfCentralDir::kEOCDLen) {
221        ALOGD("Length is %ld -- too small\n", (long)fileLength);
222        result = INVALID_OPERATION;
223        goto bail;
224    }
225
226    buf = new uint8_t[EndOfCentralDir::kMaxEOCDSearch];
227    if (buf == NULL) {
228        ALOGD("Failure allocating %d bytes for EOCD search",
229             EndOfCentralDir::kMaxEOCDSearch);
230        result = NO_MEMORY;
231        goto bail;
232    }
233
234    if (fileLength > EndOfCentralDir::kMaxEOCDSearch) {
235        seekStart = fileLength - EndOfCentralDir::kMaxEOCDSearch;
236        readAmount = EndOfCentralDir::kMaxEOCDSearch;
237    } else {
238        seekStart = 0;
239        readAmount = (long) fileLength;
240    }
241    if (fseek(mZipFp, seekStart, SEEK_SET) != 0) {
242        ALOGD("Failure seeking to end of zip at %ld", (long) seekStart);
243        result = UNKNOWN_ERROR;
244        goto bail;
245    }
246
247    /* read the last part of the file into the buffer */
248    if (fread(buf, 1, readAmount, mZipFp) != (size_t) readAmount) {
249        ALOGD("short file? wanted %ld\n", readAmount);
250        result = UNKNOWN_ERROR;
251        goto bail;
252    }
253
254    /* find the end-of-central-dir magic */
255    for (i = readAmount - 4; i >= 0; i--) {
256        if (buf[i] == 0x50 &&
257            ZipEntry::getLongLE(&buf[i]) == EndOfCentralDir::kSignature)
258        {
259            ALOGV("+++ Found EOCD at buf+%d\n", i);
260            break;
261        }
262    }
263    if (i < 0) {
264        ALOGD("EOCD not found, not Zip\n");
265        result = INVALID_OPERATION;
266        goto bail;
267    }
268
269    /* extract eocd values */
270    result = mEOCD.readBuf(buf + i, readAmount - i);
271    if (result != NO_ERROR) {
272        ALOGD("Failure reading %ld bytes of EOCD values", readAmount - i);
273        goto bail;
274    }
275    //mEOCD.dump();
276
277    if (mEOCD.mDiskNumber != 0 || mEOCD.mDiskWithCentralDir != 0 ||
278        mEOCD.mNumEntries != mEOCD.mTotalNumEntries)
279    {
280        ALOGD("Archive spanning not supported\n");
281        result = INVALID_OPERATION;
282        goto bail;
283    }
284
285    /*
286     * So far so good.  "mCentralDirSize" is the size in bytes of the
287     * central directory, so we can just seek back that far to find it.
288     * We can also seek forward mCentralDirOffset bytes from the
289     * start of the file.
290     *
291     * We're not guaranteed to have the rest of the central dir in the
292     * buffer, nor are we guaranteed that the central dir will have any
293     * sort of convenient size.  We need to skip to the start of it and
294     * read the header, then the other goodies.
295     *
296     * The only thing we really need right now is the file comment, which
297     * we're hoping to preserve.
298     */
299    if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0) {
300        ALOGD("Failure seeking to central dir offset %" PRIu32 "\n",
301             mEOCD.mCentralDirOffset);
302        result = UNKNOWN_ERROR;
303        goto bail;
304    }
305
306    /*
307     * Loop through and read the central dir entries.
308     */
309    ALOGV("Scanning %" PRIu16 " entries...\n", mEOCD.mTotalNumEntries);
310    int entry;
311    for (entry = 0; entry < mEOCD.mTotalNumEntries; entry++) {
312        ZipEntry* pEntry = new ZipEntry;
313
314        result = pEntry->initFromCDE(mZipFp);
315        if (result != NO_ERROR) {
316            ALOGD("initFromCDE failed\n");
317            delete pEntry;
318            goto bail;
319        }
320
321        mEntries.add(pEntry);
322    }
323
324
325    /*
326     * If all went well, we should now be back at the EOCD.
327     */
328    {
329        uint8_t checkBuf[4];
330        if (fread(checkBuf, 1, 4, mZipFp) != 4) {
331            ALOGD("EOCD check read failed\n");
332            result = INVALID_OPERATION;
333            goto bail;
334        }
335        if (ZipEntry::getLongLE(checkBuf) != EndOfCentralDir::kSignature) {
336            ALOGD("EOCD read check failed\n");
337            result = UNKNOWN_ERROR;
338            goto bail;
339        }
340        ALOGV("+++ EOCD read check passed\n");
341    }
342
343bail:
344    delete[] buf;
345    return result;
346}
347
348
349/*
350 * Add a new file to the archive.
351 *
352 * This requires creating and populating a ZipEntry structure, and copying
353 * the data into the file at the appropriate position.  The "appropriate
354 * position" is the current location of the central directory, which we
355 * casually overwrite (we can put it back later).
356 *
357 * If we were concerned about safety, we would want to make all changes
358 * in a temp file and then overwrite the original after everything was
359 * safely written.  Not really a concern for us.
360 */
361status_t ZipFile::addCommon(const char* fileName, const void* data, size_t size,
362    const char* storageName, int sourceType, int compressionMethod,
363    ZipEntry** ppEntry)
364{
365    ZipEntry* pEntry = NULL;
366    status_t result = NO_ERROR;
367    long lfhPosn, startPosn, endPosn, uncompressedLen;
368    FILE* inputFp = NULL;
369    uint32_t crc;
370    time_t modWhen;
371
372    if (mReadOnly)
373        return INVALID_OPERATION;
374
375    assert(compressionMethod == ZipEntry::kCompressDeflated ||
376           compressionMethod == ZipEntry::kCompressStored);
377
378    /* make sure we're in a reasonable state */
379    assert(mZipFp != NULL);
380    assert(mEntries.size() == mEOCD.mTotalNumEntries);
381
382    /* make sure it doesn't already exist */
383    if (getEntryByName(storageName) != NULL)
384        return ALREADY_EXISTS;
385
386    if (!data) {
387        inputFp = fopen(fileName, FILE_OPEN_RO);
388        if (inputFp == NULL)
389            return errnoToStatus(errno);
390    }
391
392    if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0) {
393        result = UNKNOWN_ERROR;
394        goto bail;
395    }
396
397    pEntry = new ZipEntry;
398    pEntry->initNew(storageName, NULL);
399
400    /*
401     * From here on out, failures are more interesting.
402     */
403    mNeedCDRewrite = true;
404
405    /*
406     * Write the LFH, even though it's still mostly blank.  We need it
407     * as a place-holder.  In theory the LFH isn't necessary, but in
408     * practice some utilities demand it.
409     */
410    lfhPosn = ftell(mZipFp);
411    pEntry->mLFH.write(mZipFp);
412    startPosn = ftell(mZipFp);
413
414    /*
415     * Copy the data in, possibly compressing it as we go.
416     */
417    if (sourceType == ZipEntry::kCompressStored) {
418        if (compressionMethod == ZipEntry::kCompressDeflated) {
419            bool failed = false;
420            result = compressFpToFp(mZipFp, inputFp, data, size, &crc);
421            if (result != NO_ERROR) {
422                ALOGD("compression failed, storing\n");
423                failed = true;
424            } else {
425                /*
426                 * Make sure it has compressed "enough".  This probably ought
427                 * to be set through an API call, but I don't expect our
428                 * criteria to change over time.
429                 */
430                long src = inputFp ? ftell(inputFp) : size;
431                long dst = ftell(mZipFp) - startPosn;
432                if (dst + (dst / 10) > src) {
433                    ALOGD("insufficient compression (src=%ld dst=%ld), storing\n",
434                        src, dst);
435                    failed = true;
436                }
437            }
438
439            if (failed) {
440                compressionMethod = ZipEntry::kCompressStored;
441                if (inputFp) rewind(inputFp);
442                fseek(mZipFp, startPosn, SEEK_SET);
443                /* fall through to kCompressStored case */
444            }
445        }
446        /* handle "no compression" request, or failed compression from above */
447        if (compressionMethod == ZipEntry::kCompressStored) {
448            if (inputFp) {
449                result = copyFpToFp(mZipFp, inputFp, &crc);
450            } else {
451                result = copyDataToFp(mZipFp, data, size, &crc);
452            }
453            if (result != NO_ERROR) {
454                // don't need to truncate; happens in CDE rewrite
455                ALOGD("failed copying data in\n");
456                goto bail;
457            }
458        }
459
460        // currently seeked to end of file
461        uncompressedLen = inputFp ? ftell(inputFp) : size;
462    } else if (sourceType == ZipEntry::kCompressDeflated) {
463        /* we should support uncompressed-from-compressed, but it's not
464         * important right now */
465        assert(compressionMethod == ZipEntry::kCompressDeflated);
466
467        bool scanResult;
468        int method;
469        long compressedLen;
470        unsigned long longcrc;
471
472        scanResult = ZipUtils::examineGzip(inputFp, &method, &uncompressedLen,
473                        &compressedLen, &longcrc);
474        if (!scanResult || method != ZipEntry::kCompressDeflated) {
475            ALOGD("this isn't a deflated gzip file?");
476            result = UNKNOWN_ERROR;
477            goto bail;
478        }
479        crc = longcrc;
480
481        result = copyPartialFpToFp(mZipFp, inputFp, compressedLen, NULL);
482        if (result != NO_ERROR) {
483            ALOGD("failed copying gzip data in\n");
484            goto bail;
485        }
486    } else {
487        assert(false);
488        result = UNKNOWN_ERROR;
489        goto bail;
490    }
491
492    /*
493     * We could write the "Data Descriptor", but there doesn't seem to
494     * be any point since we're going to go back and write the LFH.
495     *
496     * Update file offsets.
497     */
498    endPosn = ftell(mZipFp);            // seeked to end of compressed data
499
500    /*
501     * Success!  Fill out new values.
502     */
503    pEntry->setDataInfo(uncompressedLen, endPosn - startPosn, crc,
504        compressionMethod);
505    modWhen = getModTime(inputFp ? fileno(inputFp) : fileno(mZipFp));
506    pEntry->setModWhen(modWhen);
507    pEntry->setLFHOffset(lfhPosn);
508    mEOCD.mNumEntries++;
509    mEOCD.mTotalNumEntries++;
510    mEOCD.mCentralDirSize = 0;      // mark invalid; set by flush()
511    mEOCD.mCentralDirOffset = endPosn;
512
513    /*
514     * Go back and write the LFH.
515     */
516    if (fseek(mZipFp, lfhPosn, SEEK_SET) != 0) {
517        result = UNKNOWN_ERROR;
518        goto bail;
519    }
520    pEntry->mLFH.write(mZipFp);
521
522    /*
523     * Add pEntry to the list.
524     */
525    mEntries.add(pEntry);
526    if (ppEntry != NULL)
527        *ppEntry = pEntry;
528    pEntry = NULL;
529
530bail:
531    if (inputFp != NULL)
532        fclose(inputFp);
533    delete pEntry;
534    return result;
535}
536
537/*
538 * Add an entry by copying it from another zip file.  If "padding" is
539 * nonzero, the specified number of bytes will be added to the "extra"
540 * field in the header.
541 *
542 * If "ppEntry" is non-NULL, a pointer to the new entry will be returned.
543 */
544status_t ZipFile::add(const ZipFile* pSourceZip, const ZipEntry* pSourceEntry,
545    int padding, ZipEntry** ppEntry)
546{
547    ZipEntry* pEntry = NULL;
548    status_t result;
549    long lfhPosn, endPosn;
550
551    if (mReadOnly)
552        return INVALID_OPERATION;
553
554    /* make sure we're in a reasonable state */
555    assert(mZipFp != NULL);
556    assert(mEntries.size() == mEOCD.mTotalNumEntries);
557
558    if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0) {
559        result = UNKNOWN_ERROR;
560        goto bail;
561    }
562
563    pEntry = new ZipEntry;
564    if (pEntry == NULL) {
565        result = NO_MEMORY;
566        goto bail;
567    }
568
569    result = pEntry->initFromExternal(pSourceEntry);
570    if (result != NO_ERROR)
571        goto bail;
572    if (padding != 0) {
573        result = pEntry->addPadding(padding);
574        if (result != NO_ERROR)
575            goto bail;
576    }
577
578    /*
579     * From here on out, failures are more interesting.
580     */
581    mNeedCDRewrite = true;
582
583    /*
584     * Write the LFH.  Since we're not recompressing the data, we already
585     * have all of the fields filled out.
586     */
587    lfhPosn = ftell(mZipFp);
588    pEntry->mLFH.write(mZipFp);
589
590    /*
591     * Copy the data over.
592     *
593     * If the "has data descriptor" flag is set, we want to copy the DD
594     * fields as well.  This is a fixed-size area immediately following
595     * the data.
596     */
597    if (fseek(pSourceZip->mZipFp, pSourceEntry->getFileOffset(), SEEK_SET) != 0)
598    {
599        result = UNKNOWN_ERROR;
600        goto bail;
601    }
602
603    off_t copyLen;
604    copyLen = pSourceEntry->getCompressedLen();
605    if ((pSourceEntry->mLFH.mGPBitFlag & ZipEntry::kUsesDataDescr) != 0)
606        copyLen += ZipEntry::kDataDescriptorLen;
607
608    if (copyPartialFpToFp(mZipFp, pSourceZip->mZipFp, copyLen, NULL)
609        != NO_ERROR)
610    {
611        ALOGW("copy of '%s' failed\n", pEntry->mCDE.mFileName);
612        result = UNKNOWN_ERROR;
613        goto bail;
614    }
615
616    /*
617     * Update file offsets.
618     */
619    endPosn = ftell(mZipFp);
620
621    /*
622     * Success!  Fill out new values.
623     */
624    pEntry->setLFHOffset(lfhPosn);      // sets mCDE.mLocalHeaderRelOffset
625    mEOCD.mNumEntries++;
626    mEOCD.mTotalNumEntries++;
627    mEOCD.mCentralDirSize = 0;      // mark invalid; set by flush()
628    mEOCD.mCentralDirOffset = endPosn;
629
630    /*
631     * Add pEntry to the list.
632     */
633    mEntries.add(pEntry);
634    if (ppEntry != NULL)
635        *ppEntry = pEntry;
636    pEntry = NULL;
637
638    result = NO_ERROR;
639
640bail:
641    delete pEntry;
642    return result;
643}
644
645/*
646 * Add an entry by copying it from another zip file, recompressing with
647 * Zopfli if already compressed.
648 *
649 * If "ppEntry" is non-NULL, a pointer to the new entry will be returned.
650 */
651status_t ZipFile::addRecompress(const ZipFile* pSourceZip, const ZipEntry* pSourceEntry,
652    ZipEntry** ppEntry)
653{
654    ZipEntry* pEntry = NULL;
655    status_t result;
656    long lfhPosn, startPosn, endPosn, uncompressedLen;
657
658    if (mReadOnly)
659        return INVALID_OPERATION;
660
661    /* make sure we're in a reasonable state */
662    assert(mZipFp != NULL);
663    assert(mEntries.size() == mEOCD.mTotalNumEntries);
664
665    if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0) {
666        result = UNKNOWN_ERROR;
667        goto bail;
668    }
669
670    pEntry = new ZipEntry;
671    if (pEntry == NULL) {
672        result = NO_MEMORY;
673        goto bail;
674    }
675
676    result = pEntry->initFromExternal(pSourceEntry);
677    if (result != NO_ERROR)
678        goto bail;
679
680    /*
681     * From here on out, failures are more interesting.
682     */
683    mNeedCDRewrite = true;
684
685    /*
686     * Write the LFH, even though it's still mostly blank.  We need it
687     * as a place-holder.  In theory the LFH isn't necessary, but in
688     * practice some utilities demand it.
689     */
690    lfhPosn = ftell(mZipFp);
691    pEntry->mLFH.write(mZipFp);
692    startPosn = ftell(mZipFp);
693
694    /*
695     * Copy the data over.
696     *
697     * If the "has data descriptor" flag is set, we want to copy the DD
698     * fields as well.  This is a fixed-size area immediately following
699     * the data.
700     */
701    if (fseek(pSourceZip->mZipFp, pSourceEntry->getFileOffset(), SEEK_SET) != 0)
702    {
703        result = UNKNOWN_ERROR;
704        goto bail;
705    }
706
707    uncompressedLen = pSourceEntry->getUncompressedLen();
708
709    if (pSourceEntry->isCompressed()) {
710        void *buf = pSourceZip->uncompress(pSourceEntry);
711        if (buf == NULL) {
712            result = NO_MEMORY;
713            goto bail;
714        }
715        long startPosn = ftell(mZipFp);
716        uint32_t crc;
717        if (compressFpToFp(mZipFp, NULL, buf, uncompressedLen, &crc) != NO_ERROR) {
718            ALOGW("recompress of '%s' failed\n", pEntry->mCDE.mFileName);
719            result = UNKNOWN_ERROR;
720            free(buf);
721            goto bail;
722        }
723        long endPosn = ftell(mZipFp);
724        pEntry->setDataInfo(uncompressedLen, endPosn - startPosn,
725            pSourceEntry->getCRC32(), ZipEntry::kCompressDeflated);
726        free(buf);
727    } else {
728        off_t copyLen;
729        copyLen = pSourceEntry->getCompressedLen();
730        if ((pSourceEntry->mLFH.mGPBitFlag & ZipEntry::kUsesDataDescr) != 0)
731            copyLen += ZipEntry::kDataDescriptorLen;
732
733        if (copyPartialFpToFp(mZipFp, pSourceZip->mZipFp, copyLen, NULL)
734            != NO_ERROR)
735        {
736            ALOGW("copy of '%s' failed\n", pEntry->mCDE.mFileName);
737            result = UNKNOWN_ERROR;
738            goto bail;
739        }
740    }
741
742    /*
743     * Update file offsets.
744     */
745    endPosn = ftell(mZipFp);
746
747    /*
748     * Success!  Fill out new values.
749     */
750    pEntry->setLFHOffset(lfhPosn);
751    mEOCD.mNumEntries++;
752    mEOCD.mTotalNumEntries++;
753    mEOCD.mCentralDirSize = 0;      // mark invalid; set by flush()
754    mEOCD.mCentralDirOffset = endPosn;
755
756    /*
757     * Go back and write the LFH.
758     */
759    if (fseek(mZipFp, lfhPosn, SEEK_SET) != 0) {
760        result = UNKNOWN_ERROR;
761        goto bail;
762    }
763    pEntry->mLFH.write(mZipFp);
764
765    /*
766     * Add pEntry to the list.
767     */
768    mEntries.add(pEntry);
769    if (ppEntry != NULL)
770        *ppEntry = pEntry;
771    pEntry = NULL;
772
773    result = NO_ERROR;
774
775bail:
776    delete pEntry;
777    return result;
778}
779
780/*
781 * Copy all of the bytes in "src" to "dst".
782 *
783 * On exit, "srcFp" will be seeked to the end of the file, and "dstFp"
784 * will be seeked immediately past the data.
785 */
786status_t ZipFile::copyFpToFp(FILE* dstFp, FILE* srcFp, uint32_t* pCRC32)
787{
788    uint8_t tmpBuf[32768];
789    size_t count;
790
791    *pCRC32 = crc32(0L, Z_NULL, 0);
792
793    while (1) {
794        count = fread(tmpBuf, 1, sizeof(tmpBuf), srcFp);
795        if (ferror(srcFp) || ferror(dstFp))
796            return errnoToStatus(errno);
797        if (count == 0)
798            break;
799
800        *pCRC32 = crc32(*pCRC32, tmpBuf, count);
801
802        if (fwrite(tmpBuf, 1, count, dstFp) != count) {
803            ALOGD("fwrite %d bytes failed\n", (int) count);
804            return UNKNOWN_ERROR;
805        }
806    }
807
808    return NO_ERROR;
809}
810
811/*
812 * Copy all of the bytes in "src" to "dst".
813 *
814 * On exit, "dstFp" will be seeked immediately past the data.
815 */
816status_t ZipFile::copyDataToFp(FILE* dstFp,
817    const void* data, size_t size, uint32_t* pCRC32)
818{
819    size_t count;
820
821    *pCRC32 = crc32(0L, Z_NULL, 0);
822    if (size > 0) {
823        *pCRC32 = crc32(*pCRC32, (const unsigned char*)data, size);
824        if (fwrite(data, 1, size, dstFp) != size) {
825            ALOGD("fwrite %d bytes failed\n", (int) size);
826            return UNKNOWN_ERROR;
827        }
828    }
829
830    return NO_ERROR;
831}
832
833/*
834 * Copy some of the bytes in "src" to "dst".
835 *
836 * If "pCRC32" is NULL, the CRC will not be computed.
837 *
838 * On exit, "srcFp" will be seeked to the end of the file, and "dstFp"
839 * will be seeked immediately past the data just written.
840 */
841status_t ZipFile::copyPartialFpToFp(FILE* dstFp, FILE* srcFp, long length,
842    uint32_t* pCRC32)
843{
844    uint8_t tmpBuf[32768];
845    size_t count;
846
847    if (pCRC32 != NULL)
848        *pCRC32 = crc32(0L, Z_NULL, 0);
849
850    while (length) {
851        long readSize;
852
853        readSize = sizeof(tmpBuf);
854        if (readSize > length)
855            readSize = length;
856
857        count = fread(tmpBuf, 1, readSize, srcFp);
858        if ((long) count != readSize) {     // error or unexpected EOF
859            ALOGD("fread %d bytes failed\n", (int) readSize);
860            return UNKNOWN_ERROR;
861        }
862
863        if (pCRC32 != NULL)
864            *pCRC32 = crc32(*pCRC32, tmpBuf, count);
865
866        if (fwrite(tmpBuf, 1, count, dstFp) != count) {
867            ALOGD("fwrite %d bytes failed\n", (int) count);
868            return UNKNOWN_ERROR;
869        }
870
871        length -= readSize;
872    }
873
874    return NO_ERROR;
875}
876
877/*
878 * Compress all of the data in "srcFp" and write it to "dstFp".
879 *
880 * On exit, "srcFp" will be seeked to the end of the file, and "dstFp"
881 * will be seeked immediately past the compressed data.
882 */
883status_t ZipFile::compressFpToFp(FILE* dstFp, FILE* srcFp,
884    const void* data, size_t size, uint32_t* pCRC32)
885{
886    status_t result = NO_ERROR;
887    const size_t kBufSize = 1024 * 1024;
888    uint8_t* inBuf = NULL;
889    uint8_t* outBuf = NULL;
890    size_t outSize = 0;
891    bool atEof = false;     // no feof() aviailable yet
892    uint32_t crc;
893    ZopfliOptions options;
894    unsigned char bp = 0;
895
896    ZopfliInitOptions(&options);
897
898    crc = crc32(0L, Z_NULL, 0);
899
900    if (data) {
901        crc = crc32(crc, (const unsigned char*)data, size);
902        ZopfliDeflate(&options, 2, true, (const unsigned char*)data, size, &bp,
903            &outBuf, &outSize);
904    } else {
905        /*
906         * Create an input buffer and an output buffer.
907         */
908        inBuf = new uint8_t[kBufSize];
909        if (inBuf == NULL) {
910            result = NO_MEMORY;
911            goto bail;
912        }
913
914        /*
915         * Loop while we have data.
916         */
917        do {
918            size_t getSize;
919            getSize = fread(inBuf, 1, kBufSize, srcFp);
920            if (ferror(srcFp)) {
921                ALOGD("deflate read failed (errno=%d)\n", errno);
922                delete[] inBuf;
923                goto bail;
924            }
925            if (getSize < kBufSize) {
926                ALOGV("+++  got %d bytes, EOF reached\n",
927                    (int)getSize);
928                atEof = true;
929            }
930
931            crc = crc32(crc, inBuf, getSize);
932            ZopfliDeflate(&options, 2, atEof, inBuf, getSize, &bp, &outBuf, &outSize);
933        } while (!atEof);
934        delete[] inBuf;
935    }
936
937    ALOGV("+++ writing %d bytes\n", (int)outSize);
938    if (fwrite(outBuf, 1, outSize, dstFp) != outSize) {
939        ALOGD("write %d failed in deflate\n", (int)outSize);
940        goto bail;
941    }
942
943    *pCRC32 = crc;
944
945bail:
946    free(outBuf);
947
948    return result;
949}
950
951/*
952 * Mark an entry as deleted.
953 *
954 * We will eventually need to crunch the file down, but if several files
955 * are being removed (perhaps as part of an "update" process) we can make
956 * things considerably faster by deferring the removal to "flush" time.
957 */
958status_t ZipFile::remove(ZipEntry* pEntry)
959{
960    /*
961     * Should verify that pEntry is actually part of this archive, and
962     * not some stray ZipEntry from a different file.
963     */
964
965    /* mark entry as deleted, and mark archive as dirty */
966    pEntry->setDeleted();
967    mNeedCDRewrite = true;
968    return NO_ERROR;
969}
970
971/*
972 * Flush any pending writes.
973 *
974 * In particular, this will crunch out deleted entries, and write the
975 * Central Directory and EOCD if we have stomped on them.
976 */
977status_t ZipFile::flush(void)
978{
979    status_t result = NO_ERROR;
980    long eocdPosn;
981    int i, count;
982
983    if (mReadOnly)
984        return INVALID_OPERATION;
985    if (!mNeedCDRewrite)
986        return NO_ERROR;
987
988    assert(mZipFp != NULL);
989
990    result = crunchArchive();
991    if (result != NO_ERROR)
992        return result;
993
994    if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0)
995        return UNKNOWN_ERROR;
996
997    count = mEntries.size();
998    for (i = 0; i < count; i++) {
999        ZipEntry* pEntry = mEntries[i];
1000        pEntry->mCDE.write(mZipFp);
1001    }
1002
1003    eocdPosn = ftell(mZipFp);
1004    mEOCD.mCentralDirSize = eocdPosn - mEOCD.mCentralDirOffset;
1005
1006    mEOCD.write(mZipFp);
1007
1008    /*
1009     * If we had some stuff bloat up during compression and get replaced
1010     * with plain files, or if we deleted some entries, there's a lot
1011     * of wasted space at the end of the file.  Remove it now.
1012     */
1013    if (ftruncate(fileno(mZipFp), ftell(mZipFp)) != 0) {
1014        ALOGW("ftruncate failed %ld: %s\n", ftell(mZipFp), strerror(errno));
1015        // not fatal
1016    }
1017
1018    /* should we clear the "newly added" flag in all entries now? */
1019
1020    mNeedCDRewrite = false;
1021    return NO_ERROR;
1022}
1023
1024/*
1025 * Crunch deleted files out of an archive by shifting the later files down.
1026 *
1027 * Because we're not using a temp file, we do the operation inside the
1028 * current file.
1029 */
1030status_t ZipFile::crunchArchive(void)
1031{
1032    status_t result = NO_ERROR;
1033    int i, count;
1034    long delCount, adjust;
1035
1036#if 0
1037    printf("CONTENTS:\n");
1038    for (i = 0; i < (int) mEntries.size(); i++) {
1039        printf(" %d: lfhOff=%ld del=%d\n",
1040            i, mEntries[i]->getLFHOffset(), mEntries[i]->getDeleted());
1041    }
1042    printf("  END is %ld\n", (long) mEOCD.mCentralDirOffset);
1043#endif
1044
1045    /*
1046     * Roll through the set of files, shifting them as appropriate.  We
1047     * could probably get a slight performance improvement by sliding
1048     * multiple files down at once (because we could use larger reads
1049     * when operating on batches of small files), but it's not that useful.
1050     */
1051    count = mEntries.size();
1052    delCount = adjust = 0;
1053    for (i = 0; i < count; i++) {
1054        ZipEntry* pEntry = mEntries[i];
1055        long span;
1056
1057        if (pEntry->getLFHOffset() != 0) {
1058            long nextOffset;
1059
1060            /* Get the length of this entry by finding the offset
1061             * of the next entry.  Directory entries don't have
1062             * file offsets, so we need to find the next non-directory
1063             * entry.
1064             */
1065            nextOffset = 0;
1066            for (int ii = i+1; nextOffset == 0 && ii < count; ii++)
1067                nextOffset = mEntries[ii]->getLFHOffset();
1068            if (nextOffset == 0)
1069                nextOffset = mEOCD.mCentralDirOffset;
1070            span = nextOffset - pEntry->getLFHOffset();
1071
1072            assert(span >= ZipEntry::LocalFileHeader::kLFHLen);
1073        } else {
1074            /* This is a directory entry.  It doesn't have
1075             * any actual file contents, so there's no need to
1076             * move anything.
1077             */
1078            span = 0;
1079        }
1080
1081        //printf("+++ %d: off=%ld span=%ld del=%d [count=%d]\n",
1082        //    i, pEntry->getLFHOffset(), span, pEntry->getDeleted(), count);
1083
1084        if (pEntry->getDeleted()) {
1085            adjust += span;
1086            delCount++;
1087
1088            delete pEntry;
1089            mEntries.removeAt(i);
1090
1091            /* adjust loop control */
1092            count--;
1093            i--;
1094        } else if (span != 0 && adjust > 0) {
1095            /* shuffle this entry back */
1096            //printf("+++ Shuffling '%s' back %ld\n",
1097            //    pEntry->getFileName(), adjust);
1098            result = filemove(mZipFp, pEntry->getLFHOffset() - adjust,
1099                        pEntry->getLFHOffset(), span);
1100            if (result != NO_ERROR) {
1101                /* this is why you use a temp file */
1102                ALOGE("error during crunch - archive is toast\n");
1103                return result;
1104            }
1105
1106            pEntry->setLFHOffset(pEntry->getLFHOffset() - adjust);
1107        }
1108    }
1109
1110    /*
1111     * Fix EOCD info.  We have to wait until the end to do some of this
1112     * because we use mCentralDirOffset to determine "span" for the
1113     * last entry.
1114     */
1115    mEOCD.mCentralDirOffset -= adjust;
1116    mEOCD.mNumEntries -= delCount;
1117    mEOCD.mTotalNumEntries -= delCount;
1118    mEOCD.mCentralDirSize = 0;  // mark invalid; set by flush()
1119
1120    assert(mEOCD.mNumEntries == mEOCD.mTotalNumEntries);
1121    assert(mEOCD.mNumEntries == count);
1122
1123    return result;
1124}
1125
1126/*
1127 * Works like memmove(), but on pieces of a file.
1128 */
1129status_t ZipFile::filemove(FILE* fp, off_t dst, off_t src, size_t n)
1130{
1131    if (dst == src || n <= 0)
1132        return NO_ERROR;
1133
1134    uint8_t readBuf[32768];
1135
1136    if (dst < src) {
1137        /* shift stuff toward start of file; must read from start */
1138        while (n != 0) {
1139            size_t getSize = sizeof(readBuf);
1140            if (getSize > n)
1141                getSize = n;
1142
1143            if (fseek(fp, (long) src, SEEK_SET) != 0) {
1144                ALOGD("filemove src seek %ld failed\n", (long) src);
1145                return UNKNOWN_ERROR;
1146            }
1147
1148            if (fread(readBuf, 1, getSize, fp) != getSize) {
1149                ALOGD("filemove read %ld off=%ld failed\n",
1150                    (long) getSize, (long) src);
1151                return UNKNOWN_ERROR;
1152            }
1153
1154            if (fseek(fp, (long) dst, SEEK_SET) != 0) {
1155                ALOGD("filemove dst seek %ld failed\n", (long) dst);
1156                return UNKNOWN_ERROR;
1157            }
1158
1159            if (fwrite(readBuf, 1, getSize, fp) != getSize) {
1160                ALOGD("filemove write %ld off=%ld failed\n",
1161                    (long) getSize, (long) dst);
1162                return UNKNOWN_ERROR;
1163            }
1164
1165            src += getSize;
1166            dst += getSize;
1167            n -= getSize;
1168        }
1169    } else {
1170        /* shift stuff toward end of file; must read from end */
1171        assert(false);      // write this someday, maybe
1172        return UNKNOWN_ERROR;
1173    }
1174
1175    return NO_ERROR;
1176}
1177
1178
1179/*
1180 * Get the modification time from a file descriptor.
1181 */
1182time_t ZipFile::getModTime(int fd)
1183{
1184    struct stat sb;
1185
1186    if (fstat(fd, &sb) < 0) {
1187        ALOGD("HEY: fstat on fd %d failed\n", fd);
1188        return (time_t) -1;
1189    }
1190
1191    return sb.st_mtime;
1192}
1193
1194
1195#if 0       /* this is a bad idea */
1196/*
1197 * Get a copy of the Zip file descriptor.
1198 *
1199 * We don't allow this if the file was opened read-write because we tend
1200 * to leave the file contents in an uncertain state between calls to
1201 * flush().  The duplicated file descriptor should only be valid for reads.
1202 */
1203int ZipFile::getZipFd(void) const
1204{
1205    if (!mReadOnly)
1206        return INVALID_OPERATION;
1207    assert(mZipFp != NULL);
1208
1209    int fd;
1210    fd = dup(fileno(mZipFp));
1211    if (fd < 0) {
1212        ALOGD("didn't work, errno=%d\n", errno);
1213    }
1214
1215    return fd;
1216}
1217#endif
1218
1219
1220#if 0
1221/*
1222 * Expand data.
1223 */
1224bool ZipFile::uncompress(const ZipEntry* pEntry, void* buf) const
1225{
1226    return false;
1227}
1228#endif
1229
1230// free the memory when you're done
1231void* ZipFile::uncompress(const ZipEntry* entry) const
1232{
1233    size_t unlen = entry->getUncompressedLen();
1234    size_t clen = entry->getCompressedLen();
1235
1236    void* buf = malloc(unlen);
1237    if (buf == NULL) {
1238        return NULL;
1239    }
1240
1241    fseek(mZipFp, 0, SEEK_SET);
1242
1243    off_t offset = entry->getFileOffset();
1244    if (fseek(mZipFp, offset, SEEK_SET) != 0) {
1245        goto bail;
1246    }
1247
1248    switch (entry->getCompressionMethod())
1249    {
1250        case ZipEntry::kCompressStored: {
1251            ssize_t amt = fread(buf, 1, unlen, mZipFp);
1252            if (amt != (ssize_t)unlen) {
1253                goto bail;
1254            }
1255#if 0
1256            printf("data...\n");
1257            const unsigned char* p = (unsigned char*)buf;
1258            const unsigned char* end = p+unlen;
1259            for (int i=0; i<32 && p < end; i++) {
1260                printf("0x%08x ", (int)(offset+(i*0x10)));
1261                for (int j=0; j<0x10 && p < end; j++) {
1262                    printf(" %02x", *p);
1263                    p++;
1264                }
1265                printf("\n");
1266            }
1267#endif
1268
1269            }
1270            break;
1271        case ZipEntry::kCompressDeflated: {
1272            if (!ZipUtils::inflateToBuffer(mZipFp, buf, unlen, clen)) {
1273                goto bail;
1274            }
1275            }
1276            break;
1277        default:
1278            goto bail;
1279    }
1280    return buf;
1281
1282bail:
1283    free(buf);
1284    return NULL;
1285}
1286
1287
1288/*
1289 * ===========================================================================
1290 *      ZipFile::EndOfCentralDir
1291 * ===========================================================================
1292 */
1293
1294/*
1295 * Read the end-of-central-dir fields.
1296 *
1297 * "buf" should be positioned at the EOCD signature, and should contain
1298 * the entire EOCD area including the comment.
1299 */
1300status_t ZipFile::EndOfCentralDir::readBuf(const uint8_t* buf, int len)
1301{
1302    /* don't allow re-use */
1303    assert(mComment == NULL);
1304
1305    if (len < kEOCDLen) {
1306        /* looks like ZIP file got truncated */
1307        ALOGD(" Zip EOCD: expected >= %d bytes, found %d\n",
1308            kEOCDLen, len);
1309        return INVALID_OPERATION;
1310    }
1311
1312    /* this should probably be an assert() */
1313    if (ZipEntry::getLongLE(&buf[0x00]) != kSignature)
1314        return UNKNOWN_ERROR;
1315
1316    mDiskNumber = ZipEntry::getShortLE(&buf[0x04]);
1317    mDiskWithCentralDir = ZipEntry::getShortLE(&buf[0x06]);
1318    mNumEntries = ZipEntry::getShortLE(&buf[0x08]);
1319    mTotalNumEntries = ZipEntry::getShortLE(&buf[0x0a]);
1320    mCentralDirSize = ZipEntry::getLongLE(&buf[0x0c]);
1321    mCentralDirOffset = ZipEntry::getLongLE(&buf[0x10]);
1322    mCommentLen = ZipEntry::getShortLE(&buf[0x14]);
1323
1324    // TODO: validate mCentralDirOffset
1325
1326    if (mCommentLen > 0) {
1327        if (kEOCDLen + mCommentLen > len) {
1328            ALOGD("EOCD(%d) + comment(%" PRIu16 ") exceeds len (%d)\n",
1329                kEOCDLen, mCommentLen, len);
1330            return UNKNOWN_ERROR;
1331        }
1332        mComment = new uint8_t[mCommentLen];
1333        memcpy(mComment, buf + kEOCDLen, mCommentLen);
1334    }
1335
1336    return NO_ERROR;
1337}
1338
1339/*
1340 * Write an end-of-central-directory section.
1341 */
1342status_t ZipFile::EndOfCentralDir::write(FILE* fp)
1343{
1344    uint8_t buf[kEOCDLen];
1345
1346    ZipEntry::putLongLE(&buf[0x00], kSignature);
1347    ZipEntry::putShortLE(&buf[0x04], mDiskNumber);
1348    ZipEntry::putShortLE(&buf[0x06], mDiskWithCentralDir);
1349    ZipEntry::putShortLE(&buf[0x08], mNumEntries);
1350    ZipEntry::putShortLE(&buf[0x0a], mTotalNumEntries);
1351    ZipEntry::putLongLE(&buf[0x0c], mCentralDirSize);
1352    ZipEntry::putLongLE(&buf[0x10], mCentralDirOffset);
1353    ZipEntry::putShortLE(&buf[0x14], mCommentLen);
1354
1355    if (fwrite(buf, 1, kEOCDLen, fp) != kEOCDLen)
1356        return UNKNOWN_ERROR;
1357    if (mCommentLen > 0) {
1358        assert(mComment != NULL);
1359        if (fwrite(mComment, mCommentLen, 1, fp) != mCommentLen)
1360            return UNKNOWN_ERROR;
1361    }
1362
1363    return NO_ERROR;
1364}
1365
1366/*
1367 * Dump the contents of an EndOfCentralDir object.
1368 */
1369void ZipFile::EndOfCentralDir::dump(void) const
1370{
1371    ALOGD(" EndOfCentralDir contents:\n");
1372    ALOGD("  diskNum=%" PRIu16 " diskWCD=%" PRIu16 " numEnt=%" PRIu16 " totalNumEnt=%" PRIu16 "\n",
1373        mDiskNumber, mDiskWithCentralDir, mNumEntries, mTotalNumEntries);
1374    ALOGD("  centDirSize=%" PRIu32 " centDirOff=%" PRIu32 " commentLen=%" PRIu32 "\n",
1375        mCentralDirSize, mCentralDirOffset, mCommentLen);
1376}
1377
1378