ZipFile.cpp revision 41bc424c323f86806f04acd22304d4d149bc5dbe
1/*
2 * Copyright (C) 2006 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17//
18// Access to Zip archives.
19//
20
21#include "ZipFile.h"
22
23#include <memory.h>
24#include <sys/stat.h>
25#include <errno.h>
26#include <assert.h>
27#include <inttypes.h>
28
29using namespace android;
30
31#define LOG(...) fprintf(stderr, __VA_ARGS__)
32
33/*
34 * Open a file and rewrite the headers
35 */
36status_t ZipFile::rewrite(const char* zipFileName)
37{
38    assert(mZipFp == NULL);     // no reopen
39
40    /* open the file */
41    mZipFp = fopen(zipFileName, "r+b");
42    if (mZipFp == NULL) {
43        int err = errno;
44        LOG("fopen failed: %d\n", err);
45        return -1;
46    }
47
48    /*
49     * Load the central directory.  If that fails, then this probably
50     * isn't a Zip archive.
51     */
52    return rewriteCentralDir();
53}
54
55/*
56 * Find the central directory, read and rewrite the contents.
57 *
58 * The fun thing about ZIP archives is that they may or may not be
59 * readable from start to end.  In some cases, notably for archives
60 * that were written to stdout, the only length information is in the
61 * central directory at the end of the file.
62 *
63 * Of course, the central directory can be followed by a variable-length
64 * comment field, so we have to scan through it backwards.  The comment
65 * is at most 64K, plus we have 18 bytes for the end-of-central-dir stuff
66 * itself, plus apparently sometimes people throw random junk on the end
67 * just for the fun of it.
68 *
69 * This is all a little wobbly.  If the wrong value ends up in the EOCD
70 * area, we're hosed.  This appears to be the way that everbody handles
71 * it though, so we're in pretty good company if this fails.
72 */
73status_t ZipFile::rewriteCentralDir(void)
74{
75    status_t result = 0;
76    uint8_t* buf = NULL;
77    off_t fileLength, seekStart;
78    long readAmount;
79    int i;
80
81    fseek(mZipFp, 0, SEEK_END);
82    fileLength = ftell(mZipFp);
83    rewind(mZipFp);
84
85    /* too small to be a ZIP archive? */
86    if (fileLength < EndOfCentralDir::kEOCDLen) {
87        LOG("Length is %ld -- too small\n", (long)fileLength);
88        result = -1;
89        goto bail;
90    }
91
92    buf = new uint8_t[EndOfCentralDir::kMaxEOCDSearch];
93    if (buf == NULL) {
94        LOG("Failure allocating %d bytes for EOCD search",
95             EndOfCentralDir::kMaxEOCDSearch);
96        result = -1;
97        goto bail;
98    }
99
100    if (fileLength > EndOfCentralDir::kMaxEOCDSearch) {
101        seekStart = fileLength - EndOfCentralDir::kMaxEOCDSearch;
102        readAmount = EndOfCentralDir::kMaxEOCDSearch;
103    } else {
104        seekStart = 0;
105        readAmount = (long) fileLength;
106    }
107    if (fseek(mZipFp, seekStart, SEEK_SET) != 0) {
108        LOG("Failure seeking to end of zip at %ld", (long) seekStart);
109        result = -1;
110        goto bail;
111    }
112
113    /* read the last part of the file into the buffer */
114    if (fread(buf, 1, readAmount, mZipFp) != (size_t) readAmount) {
115        LOG("short file? wanted %ld\n", readAmount);
116        result = -1;
117        goto bail;
118    }
119
120    /* find the end-of-central-dir magic */
121    for (i = readAmount - 4; i >= 0; i--) {
122        if (buf[i] == 0x50 &&
123            ZipEntry::getLongLE(&buf[i]) == EndOfCentralDir::kSignature)
124        {
125            break;
126        }
127    }
128    if (i < 0) {
129        LOG("EOCD not found, not Zip\n");
130        result = -1;
131        goto bail;
132    }
133
134    /* extract eocd values */
135    result = mEOCD.readBuf(buf + i, readAmount - i);
136    if (result != 0) {
137        LOG("Failure reading %ld bytes of EOCD values", readAmount - i);
138        goto bail;
139    }
140
141    /*
142     * So far so good.  "mCentralDirSize" is the size in bytes of the
143     * central directory, so we can just seek back that far to find it.
144     * We can also seek forward mCentralDirOffset bytes from the
145     * start of the file.
146     *
147     * We're not guaranteed to have the rest of the central dir in the
148     * buffer, nor are we guaranteed that the central dir will have any
149     * sort of convenient size.  We need to skip to the start of it and
150     * read the header, then the other goodies.
151     *
152     * The only thing we really need right now is the file comment, which
153     * we're hoping to preserve.
154     */
155    if (fseek(mZipFp, mEOCD.mCentralDirOffset, SEEK_SET) != 0) {
156        LOG("Failure seeking to central dir offset %" PRIu32 "\n",
157             mEOCD.mCentralDirOffset);
158        result = -1;
159        goto bail;
160    }
161
162    /*
163     * Loop through and read the central dir entries.
164     */
165    int entry;
166    for (entry = 0; entry < mEOCD.mTotalNumEntries; entry++) {
167        ZipEntry* pEntry = new ZipEntry;
168
169        result = pEntry->initAndRewriteFromCDE(mZipFp);
170        if (result != 0) {
171            LOG("initFromCDE failed\n");
172            delete pEntry;
173            goto bail;
174        }
175
176        delete pEntry;
177    }
178
179
180    /*
181     * If all went well, we should now be back at the EOCD.
182     */
183    uint8_t checkBuf[4];
184    if (fread(checkBuf, 1, 4, mZipFp) != 4) {
185        LOG("EOCD check read failed\n");
186        result = -1;
187        goto bail;
188    }
189    if (ZipEntry::getLongLE(checkBuf) != EndOfCentralDir::kSignature) {
190        LOG("EOCD read check failed\n");
191        result = -1;
192        goto bail;
193    }
194
195bail:
196    delete[] buf;
197    return result;
198}
199
200/*
201 * ===========================================================================
202 *      ZipFile::EndOfCentralDir
203 * ===========================================================================
204 */
205
206/*
207 * Read the end-of-central-dir fields.
208 *
209 * "buf" should be positioned at the EOCD signature, and should contain
210 * the entire EOCD area including the comment.
211 */
212status_t ZipFile::EndOfCentralDir::readBuf(const uint8_t* buf, int len)
213{
214    uint16_t diskNumber, diskWithCentralDir, numEntries;
215
216    if (len < kEOCDLen) {
217        /* looks like ZIP file got truncated */
218        LOG(" Zip EOCD: expected >= %d bytes, found %d\n",
219            kEOCDLen, len);
220        return -1;
221    }
222
223    /* this should probably be an assert() */
224    if (ZipEntry::getLongLE(&buf[0x00]) != kSignature)
225        return -1;
226
227    diskNumber = ZipEntry::getShortLE(&buf[0x04]);
228    diskWithCentralDir = ZipEntry::getShortLE(&buf[0x06]);
229    numEntries = ZipEntry::getShortLE(&buf[0x08]);
230    mTotalNumEntries = ZipEntry::getShortLE(&buf[0x0a]);
231    mCentralDirOffset = ZipEntry::getLongLE(&buf[0x10]);
232
233    if (diskNumber != 0 || diskWithCentralDir != 0 ||
234        numEntries != mTotalNumEntries)
235    {
236        LOG("Archive spanning not supported\n");
237        return -1;
238    }
239
240    return 0;
241}
242