zip_archive.cc revision eaf988532b9e603b1599b7750bfa923fbb39d297
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *    http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Read-only access to Zip archives, with minimal heap allocation.
19 */
20#include "ziparchive/zip_archive.h"
21
22#include <zlib.h>
23
24#include <assert.h>
25#include <errno.h>
26#include <limits.h>
27#include <log/log.h>
28#include <fcntl.h>
29#include <stdlib.h>
30#include <string.h>
31#include <unistd.h>
32#include <utils/FileMap.h>
33
34#include <JNIHelp.h>  // TEMP_FAILURE_RETRY may or may not be in unistd
35
36// This is for windows. If we don't open a file in binary mode, weirds
37// things will happen.
38#ifndef O_BINARY
39#define O_BINARY 0
40#endif
41
42/*
43 * Zip file constants.
44 */
45static const uint32_t kEOCDSignature    = 0x06054b50;
46static const uint32_t kEOCDLen          = 2;
47static const uint32_t kEOCDNumEntries   = 8;              // offset to #of entries in file
48static const uint32_t kEOCDSize         = 12;             // size of the central directory
49static const uint32_t kEOCDFileOffset   = 16;             // offset to central directory
50
51static const uint32_t kMaxCommentLen    = 65535;          // longest possible in ushort
52static const uint32_t kMaxEOCDSearch    = (kMaxCommentLen + kEOCDLen);
53
54static const uint32_t kLFHSignature     = 0x04034b50;
55static const uint32_t kLFHLen           = 30;             // excluding variable-len fields
56static const uint32_t kLFHGPBFlags      = 6;              // general purpose bit flags
57static const uint32_t kLFHCRC           = 14;             // offset to CRC
58static const uint32_t kLFHCompLen       = 18;             // offset to compressed length
59static const uint32_t kLFHUncompLen     = 22;             // offset to uncompressed length
60static const uint32_t kLFHNameLen       = 26;             // offset to filename length
61static const uint32_t kLFHExtraLen      = 28;             // offset to extra length
62
63static const uint32_t kCDESignature     = 0x02014b50;
64static const uint32_t kCDELen           = 46;             // excluding variable-len fields
65static const uint32_t kCDEMethod        = 10;             // offset to compression method
66static const uint32_t kCDEModWhen       = 12;             // offset to modification timestamp
67static const uint32_t kCDECRC           = 16;             // offset to entry CRC
68static const uint32_t kCDECompLen       = 20;             // offset to compressed length
69static const uint32_t kCDEUncompLen     = 24;             // offset to uncompressed length
70static const uint32_t kCDENameLen       = 28;             // offset to filename length
71static const uint32_t kCDEExtraLen      = 30;             // offset to extra length
72static const uint32_t kCDECommentLen    = 32;             // offset to comment length
73static const uint32_t kCDELocalOffset   = 42;             // offset to local hdr
74
75static const uint32_t kDDOptSignature   = 0x08074b50;     // *OPTIONAL* data descriptor signature
76static const uint32_t kDDSignatureLen   = 4;
77static const uint32_t kDDLen            = 12;
78static const uint32_t kDDMaxLen         = 16;             // max of 16 bytes with a signature, 12 bytes without
79static const uint32_t kDDCrc32          = 0;              // offset to crc32
80static const uint32_t kDDCompLen        = 4;              // offset to compressed length
81static const uint32_t kDDUncompLen      = 8;              // offset to uncompressed length
82
83static const uint32_t kGPBDDFlagMask    = 0x0008;         // mask value that signifies that the entry has a DD
84
85static const uint32_t kMaxErrorLen = 1024;
86
87static const char* kErrorMessages[] = {
88  "Unknown return code.",
89  "Iteration ended",
90  "Zlib error",
91  "Invalid file",
92  "Invalid handle",
93  "Duplicate entries in archive",
94  "Empty archive",
95  "Entry not found",
96  "Invalid offset",
97  "Inconsistent information",
98  "Invalid entry name",
99  "I/O Error",
100  "File mapping failed"
101};
102
103static const int32_t kErrorMessageUpperBound = 0;
104
105static const int32_t kIterationEnd = -1;
106
107// We encountered a Zlib error when inflating a stream from this file.
108// Usually indicates file corruption.
109static const int32_t kZlibError = -2;
110
111// The input file cannot be processed as a zip archive. Usually because
112// it's too small, too large or does not have a valid signature.
113static const int32_t kInvalidFile = -3;
114
115// An invalid iteration / ziparchive handle was passed in as an input
116// argument.
117static const int32_t kInvalidHandle = -4;
118
119// The zip archive contained two (or possibly more) entries with the same
120// name.
121static const int32_t kDuplicateEntry = -5;
122
123// The zip archive contains no entries.
124static const int32_t kEmptyArchive = -6;
125
126// The specified entry was not found in the archive.
127static const int32_t kEntryNotFound = -7;
128
129// The zip archive contained an invalid local file header pointer.
130static const int32_t kInvalidOffset = -8;
131
132// The zip archive contained inconsistent entry information. This could
133// be because the central directory & local file header did not agree, or
134// if the actual uncompressed length or crc32 do not match their declared
135// values.
136static const int32_t kInconsistentInformation = -9;
137
138// An invalid entry name was encountered.
139static const int32_t kInvalidEntryName = -10;
140
141// An I/O related system call (read, lseek, ftruncate, map) failed.
142static const int32_t kIoError = -11;
143
144// We were not able to mmap the central directory or entry contents.
145static const int32_t kMmapFailed = -12;
146
147static const int32_t kErrorMessageLowerBound = -13;
148
149static const char kTempMappingFileName[] = "zip: ExtractFileToFile";
150
151/*
152 * A Read-only Zip archive.
153 *
154 * We want "open" and "find entry by name" to be fast operations, and
155 * we want to use as little memory as possible.  We memory-map the zip
156 * central directory, and load a hash table with pointers to the filenames
157 * (which aren't null-terminated).  The other fields are at a fixed offset
158 * from the filename, so we don't need to extract those (but we do need
159 * to byte-read and endian-swap them every time we want them).
160 *
161 * It's possible that somebody has handed us a massive (~1GB) zip archive,
162 * so we can't expect to mmap the entire file.
163 *
164 * To speed comparisons when doing a lookup by name, we could make the mapping
165 * "private" (copy-on-write) and null-terminate the filenames after verifying
166 * the record structure.  However, this requires a private mapping of
167 * every page that the Central Directory touches.  Easier to tuck a copy
168 * of the string length into the hash table entry.
169 */
170struct ZipArchive {
171  /* open Zip archive */
172  int fd;
173
174  /* mapped central directory area */
175  off64_t directory_offset;
176  android::FileMap* directory_map;
177
178  /* number of entries in the Zip archive */
179  uint16_t num_entries;
180
181  /*
182   * We know how many entries are in the Zip archive, so we can have a
183   * fixed-size hash table. We define a load factor of 0.75 and overallocat
184   * so the maximum number entries can never be higher than
185   * ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
186   */
187  uint32_t hash_table_size;
188  ZipEntryName* hash_table;
189};
190
191// Returns 0 on success and negative values on failure.
192static android::FileMap* MapFileSegment(const int fd, const off64_t start,
193                                        const size_t length, const bool read_only,
194                                        const char* debug_file_name) {
195  android::FileMap* file_map = new android::FileMap;
196  const bool success = file_map->create(debug_file_name, fd, start, length, read_only);
197  if (!success) {
198    file_map->release();
199    return NULL;
200  }
201
202  return file_map;
203}
204
205static int32_t CopyFileToFile(int fd, uint8_t* begin, const uint32_t length, uint64_t *crc_out) {
206  static const uint32_t kBufSize = 32768;
207  uint8_t buf[kBufSize];
208
209  uint32_t count = 0;
210  uint64_t crc = 0;
211  while (count < length) {
212    uint32_t remaining = length - count;
213
214    // Safe conversion because kBufSize is narrow enough for a 32 bit signed
215    // value.
216    ssize_t get_size = (remaining > kBufSize) ? kBufSize : remaining;
217    ssize_t actual = TEMP_FAILURE_RETRY(read(fd, buf, get_size));
218
219    if (actual != get_size) {
220      ALOGW("CopyFileToFile: copy read failed (%d vs %zd)",
221          (int) actual, get_size);
222      return kIoError;
223    }
224
225    memcpy(begin + count, buf, get_size);
226    crc = crc32(crc, buf, get_size);
227    count += get_size;
228  }
229
230  *crc_out = crc;
231
232  return 0;
233}
234
235/*
236 * Round up to the next highest power of 2.
237 *
238 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
239 */
240static uint32_t RoundUpPower2(uint32_t val) {
241  val--;
242  val |= val >> 1;
243  val |= val >> 2;
244  val |= val >> 4;
245  val |= val >> 8;
246  val |= val >> 16;
247  val++;
248
249  return val;
250}
251
252static uint32_t ComputeHash(const char* str, uint16_t len) {
253  uint32_t hash = 0;
254
255  while (len--) {
256    hash = hash * 31 + *str++;
257  }
258
259  return hash;
260}
261
262/*
263 * Convert a ZipEntry to a hash table index, verifying that it's in a
264 * valid range.
265 */
266static int64_t EntryToIndex(const ZipEntryName* hash_table,
267                            const uint32_t hash_table_size,
268                            const char* name, uint16_t length) {
269  const uint32_t hash = ComputeHash(name, length);
270
271  // NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
272  uint32_t ent = hash & (hash_table_size - 1);
273  while (hash_table[ent].name != NULL) {
274    if (hash_table[ent].name_length == length &&
275        memcmp(hash_table[ent].name, name, length) == 0) {
276      return ent;
277    }
278
279    ent = (ent + 1) & (hash_table_size - 1);
280  }
281
282  ALOGV("Zip: Unable to find entry %.*s", name_length, name);
283  return kEntryNotFound;
284}
285
286/*
287 * Add a new entry to the hash table.
288 */
289static int32_t AddToHash(ZipEntryName *hash_table, const uint64_t hash_table_size,
290                         const char* name, uint16_t length) {
291  const uint64_t hash = ComputeHash(name, length);
292  uint32_t ent = hash & (hash_table_size - 1);
293
294  /*
295   * We over-allocated the table, so we're guaranteed to find an empty slot.
296   * Further, we guarantee that the hashtable size is not 0.
297   */
298  while (hash_table[ent].name != NULL) {
299    if (hash_table[ent].name_length == length &&
300        memcmp(hash_table[ent].name, name, length) == 0) {
301      // We've found a duplicate entry. We don't accept it
302      ALOGW("Zip: Found duplicate entry %.*s", length, name);
303      return kDuplicateEntry;
304    }
305    ent = (ent + 1) & (hash_table_size - 1);
306  }
307
308  hash_table[ent].name = name;
309  hash_table[ent].name_length = length;
310  return 0;
311}
312
313/*
314 * Get 2 little-endian bytes.
315 */
316static uint16_t get2LE(const uint8_t* src) {
317  return src[0] | (src[1] << 8);
318}
319
320/*
321 * Get 4 little-endian bytes.
322 */
323static uint32_t get4LE(const uint8_t* src) {
324  uint32_t result;
325
326  result = src[0];
327  result |= src[1] << 8;
328  result |= src[2] << 16;
329  result |= src[3] << 24;
330
331  return result;
332}
333
334static int32_t MapCentralDirectory0(int fd, const char* debug_file_name,
335                                    ZipArchive* archive, off64_t file_length,
336                                    uint32_t read_amount, uint8_t* scan_buffer) {
337  const off64_t search_start = file_length - read_amount;
338
339  if (lseek64(fd, search_start, SEEK_SET) != search_start) {
340    ALOGW("Zip: seek %lld failed: %s", search_start, strerror(errno));
341    return kIoError;
342  }
343  ssize_t actual = TEMP_FAILURE_RETRY(read(fd, scan_buffer, read_amount));
344  if (actual != (ssize_t) read_amount) {
345    ALOGW("Zip: read %zd failed: %s", read_amount, strerror(errno));
346    return kIoError;
347  }
348
349  /*
350   * Scan backward for the EOCD magic.  In an archive without a trailing
351   * comment, we'll find it on the first try.  (We may want to consider
352   * doing an initial minimal read; if we don't find it, retry with a
353   * second read as above.)
354   */
355  int i;
356  for (i = read_amount - kEOCDLen; i >= 0; i--) {
357    if (scan_buffer[i] == 0x50 && get4LE(&scan_buffer[i]) == kEOCDSignature) {
358      ALOGV("+++ Found EOCD at buf+%d", i);
359      break;
360    }
361  }
362  if (i < 0) {
363    ALOGD("Zip: EOCD not found, %s is not zip", debug_file_name);
364    return kInvalidFile;
365  }
366
367  const off64_t eocd_offset = search_start + i;
368  const uint8_t* eocd_ptr = scan_buffer + i;
369
370  assert(eocd_offset < file_length);
371
372  /*
373   * Grab the CD offset and size, and the number of entries in the
374   * archive.  Verify that they look reasonable. Widen dir_size and
375   * dir_offset to the file offset type.
376   */
377  const uint16_t num_entries = get2LE(eocd_ptr + kEOCDNumEntries);
378  const off64_t dir_size = get4LE(eocd_ptr + kEOCDSize);
379  const off64_t dir_offset = get4LE(eocd_ptr + kEOCDFileOffset);
380
381  if (dir_offset + dir_size > eocd_offset) {
382    ALOGW("Zip: bad offsets (dir %lld, size %lld, eocd %lld)",
383        dir_offset, dir_size, eocd_offset);
384    return kInvalidOffset;
385  }
386  if (num_entries == 0) {
387    ALOGW("Zip: empty archive?");
388    return kEmptyArchive;
389  }
390
391  ALOGV("+++ num_entries=%d dir_size=%d dir_offset=%d", num_entries, dir_size,
392      dir_offset);
393
394  /*
395   * It all looks good.  Create a mapping for the CD, and set the fields
396   * in archive.
397   */
398  android::FileMap* map = MapFileSegment(fd, dir_offset, dir_size,
399                                         true /* read only */, debug_file_name);
400  if (map == NULL) {
401    archive->directory_map = NULL;
402    return kMmapFailed;
403  }
404
405  archive->directory_map = map;
406  archive->num_entries = num_entries;
407  archive->directory_offset = dir_offset;
408
409  return 0;
410}
411
412/*
413 * Find the zip Central Directory and memory-map it.
414 *
415 * On success, returns 0 after populating fields from the EOCD area:
416 *   directory_offset
417 *   directory_map
418 *   num_entries
419 */
420static int32_t MapCentralDirectory(int fd, const char* debug_file_name,
421                                   ZipArchive* archive) {
422
423  // Test file length. We use lseek64 to make sure the file
424  // is small enough to be a zip file (Its size must be less than
425  // 0xffffffff bytes).
426  off64_t file_length = lseek64(fd, 0, SEEK_END);
427  if (file_length == -1) {
428    ALOGV("Zip: lseek on fd %d failed", fd);
429    return kInvalidFile;
430  }
431
432  if (file_length > (off64_t) 0xffffffff) {
433    ALOGV("Zip: zip file too long %d", file_length);
434    return kInvalidFile;
435  }
436
437  if (file_length < (int64_t) kEOCDLen) {
438    ALOGV("Zip: length %ld is too small to be zip", file_length);
439    return kInvalidFile;
440  }
441
442  /*
443   * Perform the traditional EOCD snipe hunt.
444   *
445   * We're searching for the End of Central Directory magic number,
446   * which appears at the start of the EOCD block.  It's followed by
447   * 18 bytes of EOCD stuff and up to 64KB of archive comment.  We
448   * need to read the last part of the file into a buffer, dig through
449   * it to find the magic number, parse some values out, and use those
450   * to determine the extent of the CD.
451   *
452   * We start by pulling in the last part of the file.
453   */
454  uint32_t read_amount = kMaxEOCDSearch;
455  if (file_length < (off64_t) read_amount) {
456    read_amount = file_length;
457  }
458
459  uint8_t* scan_buffer = (uint8_t*) malloc(read_amount);
460  int32_t result = MapCentralDirectory0(fd, debug_file_name, archive,
461                                        file_length, read_amount, scan_buffer);
462
463  free(scan_buffer);
464  return result;
465}
466
467/*
468 * Parses the Zip archive's Central Directory.  Allocates and populates the
469 * hash table.
470 *
471 * Returns 0 on success.
472 */
473static int32_t ParseZipArchive(ZipArchive* archive) {
474  int32_t result = -1;
475  const uint8_t* cd_ptr = (const uint8_t*) archive->directory_map->getDataPtr();
476  size_t cd_length = archive->directory_map->getDataLength();
477  uint16_t num_entries = archive->num_entries;
478
479  /*
480   * Create hash table.  We have a minimum 75% load factor, possibly as
481   * low as 50% after we round off to a power of 2.  There must be at
482   * least one unused entry to avoid an infinite loop during creation.
483   */
484  archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3);
485  archive->hash_table = (ZipEntryName*) calloc(archive->hash_table_size,
486      sizeof(ZipEntryName));
487
488  /*
489   * Walk through the central directory, adding entries to the hash
490   * table and verifying values.
491   */
492  const uint8_t* ptr = cd_ptr;
493  for (uint16_t i = 0; i < num_entries; i++) {
494    if (get4LE(ptr) != kCDESignature) {
495      ALOGW("Zip: missed a central dir sig (at %d)", i);
496      goto bail;
497    }
498
499    if (ptr + kCDELen > cd_ptr + cd_length) {
500      ALOGW("Zip: ran off the end (at %d)", i);
501      goto bail;
502    }
503
504    const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset);
505    if (local_header_offset >= archive->directory_offset) {
506      ALOGW("Zip: bad LFH offset %lld at entry %d", local_header_offset, i);
507      goto bail;
508    }
509
510    const uint16_t file_name_length = get2LE(ptr + kCDENameLen);
511    const uint16_t extra_length = get2LE(ptr + kCDEExtraLen);
512    const uint16_t comment_length = get2LE(ptr + kCDECommentLen);
513
514    /* add the CDE filename to the hash table */
515    const int add_result = AddToHash(archive->hash_table,
516        archive->hash_table_size, (const char*) ptr + kCDELen, file_name_length);
517    if (add_result) {
518      ALOGW("Zip: Error adding entry to hash table %d", add_result);
519      result = add_result;
520      goto bail;
521    }
522
523    ptr += kCDELen + file_name_length + extra_length + comment_length;
524    if ((size_t)(ptr - cd_ptr) > cd_length) {
525      ALOGW("Zip: bad CD advance (%d vs %zd) at entry %d",
526        (int) (ptr - cd_ptr), cd_length, i);
527      goto bail;
528    }
529  }
530  ALOGV("+++ zip good scan %d entries", num_entries);
531
532  result = 0;
533
534bail:
535  return result;
536}
537
538static int32_t OpenArchiveInternal(ZipArchive* archive,
539                                   const char* debug_file_name) {
540  int32_t result = -1;
541  if ((result = MapCentralDirectory(archive->fd, debug_file_name, archive))) {
542    return result;
543  }
544
545  if ((result = ParseZipArchive(archive))) {
546    return result;
547  }
548
549  return 0;
550}
551
552int32_t OpenArchiveFd(int fd, const char* debug_file_name,
553                      ZipArchiveHandle* handle) {
554  ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive));
555  memset(archive, 0, sizeof(*archive));
556  *handle = archive;
557
558  archive->fd = fd;
559
560  return OpenArchiveInternal(archive, debug_file_name);
561}
562
563int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle) {
564  ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive));
565  memset(archive, 0, sizeof(*archive));
566  *handle = archive;
567
568  const int fd = open(fileName, O_RDONLY | O_BINARY, 0);
569  if (fd < 0) {
570    ALOGW("Unable to open '%s': %s", fileName, strerror(errno));
571    return kIoError;
572  } else {
573    archive->fd = fd;
574  }
575
576  return OpenArchiveInternal(archive, fileName);
577}
578
579/*
580 * Close a ZipArchive, closing the file and freeing the contents.
581 */
582void CloseArchive(ZipArchiveHandle handle) {
583  ZipArchive* archive = (ZipArchive*) handle;
584  ALOGV("Closing archive %p", archive);
585
586  if (archive->fd >= 0) {
587    close(archive->fd);
588  }
589
590  if (archive->directory_map != NULL) {
591    archive->directory_map->release();
592  }
593  free(archive->hash_table);
594
595  /* ensure nobody tries to use the ZipArchive after it's closed */
596  archive->directory_offset = -1;
597  archive->fd = -1;
598  archive->num_entries = -1;
599  archive->hash_table_size = -1;
600  archive->hash_table = NULL;
601}
602
603static int32_t UpdateEntryFromDataDescriptor(int fd,
604                                             ZipEntry *entry) {
605  uint8_t ddBuf[kDDMaxLen];
606  ssize_t actual = TEMP_FAILURE_RETRY(read(fd, ddBuf, sizeof(ddBuf)));
607  if (actual != sizeof(ddBuf)) {
608    return kIoError;
609  }
610
611  const uint32_t ddSignature = get4LE(ddBuf);
612  uint16_t ddOffset = 0;
613  if (ddSignature == kDDOptSignature) {
614    ddOffset = 4;
615  }
616
617  entry->crc32 = get4LE(ddBuf + ddOffset + kDDCrc32);
618  entry->compressed_length = get4LE(ddBuf + ddOffset + kDDCompLen);
619  entry->uncompressed_length = get4LE(ddBuf + ddOffset + kDDUncompLen);
620
621  return 0;
622}
623
624// Attempts to read |len| bytes into |buf| at offset |off|.
625//
626// This method uses pread64 on platforms that support it and
627// lseek64 + read on platforms that don't. This implies that
628// callers should not rely on the |fd| offset being incremented
629// as a side effect of this call.
630static inline ssize_t ReadAtOffset(int fd, uint8_t* buf, size_t len,
631                                   off64_t off) {
632#ifdef HAVE_PREAD
633  return TEMP_FAILURE_RETRY(pread64(fd, buf, len, off));
634#else
635  // The only supported platform that doesn't support pread at the moment
636  // is Windows. Only recent versions of windows support unix like forks,
637  // and even there the semantics are quite different.
638  if (lseek64(fd, off, SEEK_SET) != off) {
639    ALOGW("Zip: failed seek to offset %lld", off);
640    return kIoError;
641  }
642
643  return TEMP_FAILURE_RETRY(read(fd, buf, len));
644#endif  // HAVE_PREAD
645}
646
647static int32_t FindEntry(const ZipArchive* archive, const int ent,
648                         ZipEntry* data) {
649  const uint16_t nameLen = archive->hash_table[ent].name_length;
650  const char* name = archive->hash_table[ent].name;
651
652  // Recover the start of the central directory entry from the filename
653  // pointer.  The filename is the first entry past the fixed-size data,
654  // so we can just subtract back from that.
655  const unsigned char* ptr = (const unsigned char*) name;
656  ptr -= kCDELen;
657
658  // This is the base of our mmapped region, we have to sanity check that
659  // the name that's in the hash table is a pointer to a location within
660  // this mapped region.
661  const unsigned char* base_ptr = (const unsigned char*)
662    archive->directory_map->getDataPtr();
663  if (ptr < base_ptr || ptr > base_ptr + archive->directory_map->getDataLength()) {
664    ALOGW("Zip: Invalid entry pointer");
665    return kInvalidOffset;
666  }
667
668  // The offset of the start of the central directory in the zipfile.
669  // We keep this lying around so that we can sanity check all our lengths
670  // and our per-file structures.
671  const off64_t cd_offset = archive->directory_offset;
672
673  // Fill out the compression method, modification time, crc32
674  // and other interesting attributes from the central directory. These
675  // will later be compared against values from the local file header.
676  data->method = get2LE(ptr + kCDEMethod);
677  data->mod_time = get4LE(ptr + kCDEModWhen);
678  data->crc32 = get4LE(ptr + kCDECRC);
679  data->compressed_length = get4LE(ptr + kCDECompLen);
680  data->uncompressed_length = get4LE(ptr + kCDEUncompLen);
681
682  // Figure out the local header offset from the central directory. The
683  // actual file data will begin after the local header and the name /
684  // extra comments.
685  const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset);
686  if (local_header_offset + (off64_t) kLFHLen >= cd_offset) {
687    ALOGW("Zip: bad local hdr offset in zip");
688    return kInvalidOffset;
689  }
690
691  uint8_t lfh_buf[kLFHLen];
692  ssize_t actual = ReadAtOffset(archive->fd, lfh_buf, sizeof(lfh_buf),
693                                 local_header_offset);
694  if (actual != sizeof(lfh_buf)) {
695    ALOGW("Zip: failed reading lfh name from offset %lld", local_header_offset);
696    return kIoError;
697  }
698
699  if (get4LE(lfh_buf) != kLFHSignature) {
700    ALOGW("Zip: didn't find signature at start of lfh, offset=%lld",
701        local_header_offset);
702    return kInvalidOffset;
703  }
704
705  // Paranoia: Match the values specified in the local file header
706  // to those specified in the central directory.
707  const uint16_t lfhGpbFlags = get2LE(lfh_buf + kLFHGPBFlags);
708  const uint16_t lfhNameLen = get2LE(lfh_buf + kLFHNameLen);
709  const uint16_t lfhExtraLen = get2LE(lfh_buf + kLFHExtraLen);
710
711  if ((lfhGpbFlags & kGPBDDFlagMask) == 0) {
712    const uint32_t lfhCrc = get4LE(lfh_buf + kLFHCRC);
713    const uint32_t lfhCompLen = get4LE(lfh_buf + kLFHCompLen);
714    const uint32_t lfhUncompLen = get4LE(lfh_buf + kLFHUncompLen);
715
716    data->has_data_descriptor = 0;
717    if (data->compressed_length != lfhCompLen || data->uncompressed_length != lfhUncompLen
718        || data->crc32 != lfhCrc) {
719      ALOGW("Zip: size/crc32 mismatch. expected {%d, %d, %x}, was {%d, %d, %x}",
720        data->compressed_length, data->uncompressed_length, data->crc32,
721        lfhCompLen, lfhUncompLen, lfhCrc);
722      return kInconsistentInformation;
723    }
724  } else {
725    data->has_data_descriptor = 1;
726  }
727
728  // Check that the local file header name matches the declared
729  // name in the central directory.
730  if (lfhNameLen == nameLen) {
731    const off64_t name_offset = local_header_offset + kLFHLen;
732    if (name_offset + lfhNameLen >= cd_offset) {
733      ALOGW("Zip: Invalid declared length");
734      return kInvalidOffset;
735    }
736
737    uint8_t* name_buf = (uint8_t*) malloc(nameLen);
738    ssize_t actual = ReadAtOffset(archive->fd, name_buf, nameLen,
739                                  name_offset);
740
741    if (actual != nameLen) {
742      ALOGW("Zip: failed reading lfh name from offset %lld", name_offset);
743      free(name_buf);
744      return kIoError;
745    }
746
747    if (memcmp(name, name_buf, nameLen)) {
748      free(name_buf);
749      return kInconsistentInformation;
750    }
751
752    free(name_buf);
753  } else {
754    ALOGW("Zip: lfh name did not match central directory.");
755    return kInconsistentInformation;
756  }
757
758  const off64_t data_offset = local_header_offset + kLFHLen + lfhNameLen + lfhExtraLen;
759  if (data_offset >= cd_offset) {
760    ALOGW("Zip: bad data offset %lld in zip", (off64_t) data_offset);
761    return kInvalidOffset;
762  }
763
764  if ((off64_t)(data_offset + data->compressed_length) > cd_offset) {
765    ALOGW("Zip: bad compressed length in zip (%lld + %zd > %lld)",
766      data_offset, data->compressed_length, cd_offset);
767    return kInvalidOffset;
768  }
769
770  if (data->method == kCompressStored &&
771    (off64_t)(data_offset + data->uncompressed_length) > cd_offset) {
772     ALOGW("Zip: bad uncompressed length in zip (%lld + %zd > %lld)",
773       data_offset, data->uncompressed_length, cd_offset);
774     return kInvalidOffset;
775  }
776
777  data->offset = data_offset;
778  return 0;
779}
780
781struct IterationHandle {
782  uint32_t position;
783  const char* prefix;
784  uint16_t prefix_len;
785  ZipArchive* archive;
786};
787
788int32_t StartIteration(ZipArchiveHandle handle, void** cookie_ptr, const char* prefix) {
789  ZipArchive* archive = (ZipArchive *) handle;
790
791  if (archive == NULL || archive->hash_table == NULL) {
792    ALOGW("Zip: Invalid ZipArchiveHandle");
793    return kInvalidHandle;
794  }
795
796  IterationHandle* cookie = (IterationHandle*) malloc(sizeof(IterationHandle));
797  cookie->position = 0;
798  cookie->prefix = prefix;
799  cookie->archive = archive;
800  if (prefix != NULL) {
801    cookie->prefix_len = strlen(prefix);
802  }
803
804  *cookie_ptr = cookie ;
805  return 0;
806}
807
808int32_t FindEntry(const ZipArchiveHandle handle, const char* entryName,
809                  ZipEntry* data) {
810  const ZipArchive* archive = (ZipArchive*) handle;
811  const int nameLen = strlen(entryName);
812  if (nameLen == 0 || nameLen > 65535) {
813    ALOGW("Zip: Invalid filename %s", entryName);
814    return kInvalidEntryName;
815  }
816
817  const int64_t ent = EntryToIndex(archive->hash_table,
818    archive->hash_table_size, entryName, nameLen);
819
820  if (ent < 0) {
821    ALOGW("Zip: Could not find entry %.*s", nameLen, entryName);
822    return ent;
823  }
824
825  return FindEntry(archive, ent, data);
826}
827
828int32_t Next(void* cookie, ZipEntry* data, ZipEntryName* name) {
829  IterationHandle* handle = (IterationHandle *) cookie;
830  if (handle == NULL) {
831    return kInvalidHandle;
832  }
833
834  ZipArchive* archive = handle->archive;
835  if (archive == NULL || archive->hash_table == NULL) {
836    ALOGW("Zip: Invalid ZipArchiveHandle");
837    return kInvalidHandle;
838  }
839
840  const uint32_t currentOffset = handle->position;
841  const uint32_t hash_table_length = archive->hash_table_size;
842  const ZipEntryName *hash_table = archive->hash_table;
843
844  for (uint32_t i = currentOffset; i < hash_table_length; ++i) {
845    if (hash_table[i].name != NULL &&
846        (handle->prefix == NULL ||
847         (memcmp(handle->prefix, hash_table[i].name, handle->prefix_len) == 0))) {
848      handle->position = (i + 1);
849      const int error = FindEntry(archive, i, data);
850      if (!error) {
851        name->name = hash_table[i].name;
852        name->name_length = hash_table[i].name_length;
853      }
854
855      return error;
856    }
857  }
858
859  handle->position = 0;
860  return kIterationEnd;
861}
862
863static int32_t InflateToFile(int fd, const ZipEntry* entry,
864                             uint8_t* begin, uint32_t length,
865                             uint64_t* crc_out) {
866  int32_t result = -1;
867  const uint32_t kBufSize = 32768;
868  uint8_t read_buf[kBufSize];
869  uint8_t write_buf[kBufSize];
870  z_stream zstream;
871  int zerr;
872
873  /*
874   * Initialize the zlib stream struct.
875   */
876  memset(&zstream, 0, sizeof(zstream));
877  zstream.zalloc = Z_NULL;
878  zstream.zfree = Z_NULL;
879  zstream.opaque = Z_NULL;
880  zstream.next_in = NULL;
881  zstream.avail_in = 0;
882  zstream.next_out = (Bytef*) write_buf;
883  zstream.avail_out = kBufSize;
884  zstream.data_type = Z_UNKNOWN;
885
886  /*
887   * Use the undocumented "negative window bits" feature to tell zlib
888   * that there's no zlib header waiting for it.
889   */
890  zerr = inflateInit2(&zstream, -MAX_WBITS);
891  if (zerr != Z_OK) {
892    if (zerr == Z_VERSION_ERROR) {
893      ALOGE("Installed zlib is not compatible with linked version (%s)",
894        ZLIB_VERSION);
895    } else {
896      ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr);
897    }
898
899    return kZlibError;
900  }
901
902  const uint32_t uncompressed_length = entry->uncompressed_length;
903
904  uint32_t compressed_length = entry->compressed_length;
905  uint32_t write_count = 0;
906  do {
907    /* read as much as we can */
908    if (zstream.avail_in == 0) {
909      const ssize_t getSize = (compressed_length > kBufSize) ? kBufSize : compressed_length;
910      const ssize_t actual = TEMP_FAILURE_RETRY(read(fd, read_buf, getSize));
911      if (actual != getSize) {
912        ALOGW("Zip: inflate read failed (%d vs %zd)", actual, getSize);
913        result = kIoError;
914        goto z_bail;
915      }
916
917      compressed_length -= getSize;
918
919      zstream.next_in = read_buf;
920      zstream.avail_in = getSize;
921    }
922
923    /* uncompress the data */
924    zerr = inflate(&zstream, Z_NO_FLUSH);
925    if (zerr != Z_OK && zerr != Z_STREAM_END) {
926      ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)",
927          zerr, zstream.next_in, zstream.avail_in,
928          zstream.next_out, zstream.avail_out);
929      result = kZlibError;
930      goto z_bail;
931    }
932
933    /* write when we're full or when we're done */
934    if (zstream.avail_out == 0 ||
935      (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) {
936      const size_t write_size = zstream.next_out - write_buf;
937      // The file might have declared a bogus length.
938      if (write_size + write_count > length) {
939        goto z_bail;
940      }
941      memcpy(begin + write_count, write_buf, write_size);
942      write_count += write_size;
943
944      zstream.next_out = write_buf;
945      zstream.avail_out = kBufSize;
946    }
947  } while (zerr == Z_OK);
948
949  assert(zerr == Z_STREAM_END);     /* other errors should've been caught */
950
951  // stream.adler holds the crc32 value for such streams.
952  *crc_out = zstream.adler;
953
954  if (zstream.total_out != uncompressed_length || compressed_length != 0) {
955    ALOGW("Zip: size mismatch on inflated file (%ld vs %zd)",
956        zstream.total_out, uncompressed_length);
957    result = kInconsistentInformation;
958    goto z_bail;
959  }
960
961  result = 0;
962
963z_bail:
964  inflateEnd(&zstream);    /* free up any allocated structures */
965
966  return result;
967}
968
969int32_t ExtractToMemory(ZipArchiveHandle handle,
970                        ZipEntry* entry, uint8_t* begin, uint32_t size) {
971  ZipArchive* archive = (ZipArchive*) handle;
972  const uint16_t method = entry->method;
973  off64_t data_offset = entry->offset;
974
975  if (lseek64(archive->fd, data_offset, SEEK_SET) != data_offset) {
976    ALOGW("Zip: lseek to data at %lld failed", (off64_t) data_offset);
977    return kIoError;
978  }
979
980  // this should default to kUnknownCompressionMethod.
981  int32_t return_value = -1;
982  uint64_t crc = 0;
983  if (method == kCompressStored) {
984    return_value = CopyFileToFile(archive->fd, begin, size, &crc);
985  } else if (method == kCompressDeflated) {
986    return_value = InflateToFile(archive->fd, entry, begin, size, &crc);
987  }
988
989  if (!return_value && entry->has_data_descriptor) {
990    return_value = UpdateEntryFromDataDescriptor(archive->fd, entry);
991    if (return_value) {
992      return return_value;
993    }
994  }
995
996  // TODO: Fix this check by passing the right flags to inflate2 so that
997  // it calculates the CRC for us.
998  if (entry->crc32 != crc && false) {
999    ALOGW("Zip: crc mismatch: expected %u, was %llu", entry->crc32, crc);
1000    return kInconsistentInformation;
1001  }
1002
1003  return return_value;
1004}
1005
1006int32_t ExtractEntryToFile(ZipArchiveHandle handle,
1007                           ZipEntry* entry, int fd) {
1008  const int32_t declared_length = entry->uncompressed_length;
1009
1010  int result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length));
1011  if (result == -1) {
1012    ALOGW("Zip: unable to truncate file to %ud", declared_length);
1013    return kIoError;
1014  }
1015
1016  android::FileMap* map  = MapFileSegment(fd, 0, declared_length,
1017                                          false, kTempMappingFileName);
1018  if (map == NULL) {
1019    return kMmapFailed;
1020  }
1021
1022  const int32_t error = ExtractToMemory(handle, entry,
1023                                        reinterpret_cast<uint8_t*>(map->getDataPtr()),
1024                                        map->getDataLength());
1025  map->release();
1026  return error;
1027}
1028
1029const char* ErrorCodeString(int32_t error_code) {
1030  if (error_code > kErrorMessageLowerBound && error_code < kErrorMessageUpperBound) {
1031    return kErrorMessages[error_code * -1];
1032  }
1033
1034  return kErrorMessages[0];
1035}
1036
1037int GetFileDescriptor(const ZipArchiveHandle handle) {
1038  return ((ZipArchive*) handle)->fd;
1039}
1040
1041