zip_archive.cc revision 56a90a08dbbbf96ef415dc6bc84bff2a409efc68
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *    http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Read-only access to Zip archives, with minimal heap allocation.
19 */
20
21#include <assert.h>
22#include <errno.h>
23#include <fcntl.h>
24#include <inttypes.h>
25#include <limits.h>
26#include <log/log.h>
27#include <stdlib.h>
28#include <string.h>
29#include <unistd.h>
30#include <utils/Compat.h>
31#include <utils/FileMap.h>
32#include <zlib.h>
33
34#include <JNIHelp.h>  // TEMP_FAILURE_RETRY may or may not be in unistd
35
36#include "ziparchive/zip_archive.h"
37
38// This is for windows. If we don't open a file in binary mode, weirds
39// things will happen.
40#ifndef O_BINARY
41#define O_BINARY 0
42#endif
43
44/*
45 * Zip file constants.
46 */
47static const uint32_t kEOCDSignature    = 0x06054b50;
48static const uint32_t kEOCDLen          = 2;
49static const uint32_t kEOCDNumEntries   = 8;              // offset to #of entries in file
50static const uint32_t kEOCDSize         = 12;             // size of the central directory
51static const uint32_t kEOCDFileOffset   = 16;             // offset to central directory
52
53static const uint32_t kMaxCommentLen    = 65535;          // longest possible in ushort
54static const uint32_t kMaxEOCDSearch    = (kMaxCommentLen + kEOCDLen);
55
56static const uint32_t kLFHSignature     = 0x04034b50;
57static const uint32_t kLFHLen           = 30;             // excluding variable-len fields
58static const uint32_t kLFHGPBFlags      = 6;              // general purpose bit flags
59static const uint32_t kLFHCRC           = 14;             // offset to CRC
60static const uint32_t kLFHCompLen       = 18;             // offset to compressed length
61static const uint32_t kLFHUncompLen     = 22;             // offset to uncompressed length
62static const uint32_t kLFHNameLen       = 26;             // offset to filename length
63static const uint32_t kLFHExtraLen      = 28;             // offset to extra length
64
65static const uint32_t kCDESignature     = 0x02014b50;
66static const uint32_t kCDELen           = 46;             // excluding variable-len fields
67static const uint32_t kCDEMethod        = 10;             // offset to compression method
68static const uint32_t kCDEModWhen       = 12;             // offset to modification timestamp
69static const uint32_t kCDECRC           = 16;             // offset to entry CRC
70static const uint32_t kCDECompLen       = 20;             // offset to compressed length
71static const uint32_t kCDEUncompLen     = 24;             // offset to uncompressed length
72static const uint32_t kCDENameLen       = 28;             // offset to filename length
73static const uint32_t kCDEExtraLen      = 30;             // offset to extra length
74static const uint32_t kCDECommentLen    = 32;             // offset to comment length
75static const uint32_t kCDELocalOffset   = 42;             // offset to local hdr
76
77static const uint32_t kDDOptSignature   = 0x08074b50;     // *OPTIONAL* data descriptor signature
78static const uint32_t kDDSignatureLen   = 4;
79static const uint32_t kDDLen            = 12;
80static const uint32_t kDDMaxLen         = 16;             // max of 16 bytes with a signature, 12 bytes without
81static const uint32_t kDDCrc32          = 0;              // offset to crc32
82static const uint32_t kDDCompLen        = 4;              // offset to compressed length
83static const uint32_t kDDUncompLen      = 8;              // offset to uncompressed length
84
85static const uint32_t kGPBDDFlagMask    = 0x0008;         // mask value that signifies that the entry has a DD
86
87static const uint32_t kMaxErrorLen = 1024;
88
89static const char* kErrorMessages[] = {
90  "Unknown return code.",
91  "Iteration ended",
92  "Zlib error",
93  "Invalid file",
94  "Invalid handle",
95  "Duplicate entries in archive",
96  "Empty archive",
97  "Entry not found",
98  "Invalid offset",
99  "Inconsistent information",
100  "Invalid entry name",
101  "I/O Error",
102  "File mapping failed"
103};
104
105static const int32_t kErrorMessageUpperBound = 0;
106
107static const int32_t kIterationEnd = -1;
108
109// We encountered a Zlib error when inflating a stream from this file.
110// Usually indicates file corruption.
111static const int32_t kZlibError = -2;
112
113// The input file cannot be processed as a zip archive. Usually because
114// it's too small, too large or does not have a valid signature.
115static const int32_t kInvalidFile = -3;
116
117// An invalid iteration / ziparchive handle was passed in as an input
118// argument.
119static const int32_t kInvalidHandle = -4;
120
121// The zip archive contained two (or possibly more) entries with the same
122// name.
123static const int32_t kDuplicateEntry = -5;
124
125// The zip archive contains no entries.
126static const int32_t kEmptyArchive = -6;
127
128// The specified entry was not found in the archive.
129static const int32_t kEntryNotFound = -7;
130
131// The zip archive contained an invalid local file header pointer.
132static const int32_t kInvalidOffset = -8;
133
134// The zip archive contained inconsistent entry information. This could
135// be because the central directory & local file header did not agree, or
136// if the actual uncompressed length or crc32 do not match their declared
137// values.
138static const int32_t kInconsistentInformation = -9;
139
140// An invalid entry name was encountered.
141static const int32_t kInvalidEntryName = -10;
142
143// An I/O related system call (read, lseek, ftruncate, map) failed.
144static const int32_t kIoError = -11;
145
146// We were not able to mmap the central directory or entry contents.
147static const int32_t kMmapFailed = -12;
148
149static const int32_t kErrorMessageLowerBound = -13;
150
151static const char kTempMappingFileName[] = "zip: ExtractFileToFile";
152
153/*
154 * A Read-only Zip archive.
155 *
156 * We want "open" and "find entry by name" to be fast operations, and
157 * we want to use as little memory as possible.  We memory-map the zip
158 * central directory, and load a hash table with pointers to the filenames
159 * (which aren't null-terminated).  The other fields are at a fixed offset
160 * from the filename, so we don't need to extract those (but we do need
161 * to byte-read and endian-swap them every time we want them).
162 *
163 * It's possible that somebody has handed us a massive (~1GB) zip archive,
164 * so we can't expect to mmap the entire file.
165 *
166 * To speed comparisons when doing a lookup by name, we could make the mapping
167 * "private" (copy-on-write) and null-terminate the filenames after verifying
168 * the record structure.  However, this requires a private mapping of
169 * every page that the Central Directory touches.  Easier to tuck a copy
170 * of the string length into the hash table entry.
171 */
172struct ZipArchive {
173  /* open Zip archive */
174  int fd;
175
176  /* mapped central directory area */
177  off64_t directory_offset;
178  android::FileMap* directory_map;
179
180  /* number of entries in the Zip archive */
181  uint16_t num_entries;
182
183  /*
184   * We know how many entries are in the Zip archive, so we can have a
185   * fixed-size hash table. We define a load factor of 0.75 and overallocat
186   * so the maximum number entries can never be higher than
187   * ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
188   */
189  uint32_t hash_table_size;
190  ZipEntryName* hash_table;
191};
192
193// Returns 0 on success and negative values on failure.
194static android::FileMap* MapFileSegment(const int fd, const off64_t start,
195                                        const size_t length, const bool read_only,
196                                        const char* debug_file_name) {
197  android::FileMap* file_map = new android::FileMap;
198  const bool success = file_map->create(debug_file_name, fd, start, length, read_only);
199  if (!success) {
200    file_map->release();
201    return NULL;
202  }
203
204  return file_map;
205}
206
207static int32_t CopyFileToFile(int fd, uint8_t* begin, const uint32_t length, uint64_t *crc_out) {
208  static const uint32_t kBufSize = 32768;
209  uint8_t buf[kBufSize];
210
211  uint32_t count = 0;
212  uint64_t crc = 0;
213  while (count < length) {
214    uint32_t remaining = length - count;
215
216    // Safe conversion because kBufSize is narrow enough for a 32 bit signed
217    // value.
218    ssize_t get_size = (remaining > kBufSize) ? kBufSize : remaining;
219    ssize_t actual = TEMP_FAILURE_RETRY(read(fd, buf, get_size));
220
221    if (actual != get_size) {
222      ALOGW("CopyFileToFile: copy read failed (" ZD " vs " ZD ")", actual, get_size);
223      return kIoError;
224    }
225
226    memcpy(begin + count, buf, get_size);
227    crc = crc32(crc, buf, get_size);
228    count += get_size;
229  }
230
231  *crc_out = crc;
232
233  return 0;
234}
235
236/*
237 * Round up to the next highest power of 2.
238 *
239 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
240 */
241static uint32_t RoundUpPower2(uint32_t val) {
242  val--;
243  val |= val >> 1;
244  val |= val >> 2;
245  val |= val >> 4;
246  val |= val >> 8;
247  val |= val >> 16;
248  val++;
249
250  return val;
251}
252
253static uint32_t ComputeHash(const char* str, uint16_t len) {
254  uint32_t hash = 0;
255
256  while (len--) {
257    hash = hash * 31 + *str++;
258  }
259
260  return hash;
261}
262
263/*
264 * Convert a ZipEntry to a hash table index, verifying that it's in a
265 * valid range.
266 */
267static int64_t EntryToIndex(const ZipEntryName* hash_table,
268                            const uint32_t hash_table_size,
269                            const char* name, uint16_t length) {
270  const uint32_t hash = ComputeHash(name, length);
271
272  // NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
273  uint32_t ent = hash & (hash_table_size - 1);
274  while (hash_table[ent].name != NULL) {
275    if (hash_table[ent].name_length == length &&
276        memcmp(hash_table[ent].name, name, length) == 0) {
277      return ent;
278    }
279
280    ent = (ent + 1) & (hash_table_size - 1);
281  }
282
283  ALOGV("Zip: Unable to find entry %.*s", length, name);
284  return kEntryNotFound;
285}
286
287/*
288 * Add a new entry to the hash table.
289 */
290static int32_t AddToHash(ZipEntryName *hash_table, const uint64_t hash_table_size,
291                         const char* name, uint16_t length) {
292  const uint64_t hash = ComputeHash(name, length);
293  uint32_t ent = hash & (hash_table_size - 1);
294
295  /*
296   * We over-allocated the table, so we're guaranteed to find an empty slot.
297   * Further, we guarantee that the hashtable size is not 0.
298   */
299  while (hash_table[ent].name != NULL) {
300    if (hash_table[ent].name_length == length &&
301        memcmp(hash_table[ent].name, name, length) == 0) {
302      // We've found a duplicate entry. We don't accept it
303      ALOGW("Zip: Found duplicate entry %.*s", length, name);
304      return kDuplicateEntry;
305    }
306    ent = (ent + 1) & (hash_table_size - 1);
307  }
308
309  hash_table[ent].name = name;
310  hash_table[ent].name_length = length;
311  return 0;
312}
313
314/*
315 * Get 2 little-endian bytes.
316 */
317static uint16_t get2LE(const uint8_t* src) {
318  return src[0] | (src[1] << 8);
319}
320
321/*
322 * Get 4 little-endian bytes.
323 */
324static uint32_t get4LE(const uint8_t* src) {
325  uint32_t result;
326
327  result = src[0];
328  result |= src[1] << 8;
329  result |= src[2] << 16;
330  result |= src[3] << 24;
331
332  return result;
333}
334
335static int32_t MapCentralDirectory0(int fd, const char* debug_file_name,
336                                    ZipArchive* archive, off64_t file_length,
337                                    uint32_t read_amount, uint8_t* scan_buffer) {
338  const off64_t search_start = file_length - read_amount;
339
340  if (lseek64(fd, search_start, SEEK_SET) != search_start) {
341    ALOGW("Zip: seek %" PRId64 " failed: %s", search_start, strerror(errno));
342    return kIoError;
343  }
344  ssize_t actual = TEMP_FAILURE_RETRY(read(fd, scan_buffer, read_amount));
345  if (actual != (ssize_t) read_amount) {
346    ALOGW("Zip: read %" PRIu32 " failed: %s", read_amount, strerror(errno));
347    return kIoError;
348  }
349
350  /*
351   * Scan backward for the EOCD magic.  In an archive without a trailing
352   * comment, we'll find it on the first try.  (We may want to consider
353   * doing an initial minimal read; if we don't find it, retry with a
354   * second read as above.)
355   */
356  int i;
357  for (i = read_amount - kEOCDLen; i >= 0; i--) {
358    if (scan_buffer[i] == 0x50 && get4LE(&scan_buffer[i]) == kEOCDSignature) {
359      ALOGV("+++ Found EOCD at buf+%d", i);
360      break;
361    }
362  }
363  if (i < 0) {
364    ALOGD("Zip: EOCD not found, %s is not zip", debug_file_name);
365    return kInvalidFile;
366  }
367
368  const off64_t eocd_offset = search_start + i;
369  const uint8_t* eocd_ptr = scan_buffer + i;
370
371  assert(eocd_offset < file_length);
372
373  /*
374   * Grab the CD offset and size, and the number of entries in the
375   * archive.  Verify that they look reasonable. Widen dir_size and
376   * dir_offset to the file offset type.
377   */
378  const uint16_t num_entries = get2LE(eocd_ptr + kEOCDNumEntries);
379  const off64_t dir_size = get4LE(eocd_ptr + kEOCDSize);
380  const off64_t dir_offset = get4LE(eocd_ptr + kEOCDFileOffset);
381
382  if (dir_offset + dir_size > eocd_offset) {
383    ALOGW("Zip: bad offsets (dir %" PRId64 ", size %" PRId64 ", eocd %" PRId64 ")",
384        dir_offset, dir_size, eocd_offset);
385    return kInvalidOffset;
386  }
387  if (num_entries == 0) {
388    ALOGW("Zip: empty archive?");
389    return kEmptyArchive;
390  }
391
392  ALOGV("+++ num_entries=%d dir_size=%" PRId64 " dir_offset=%" PRId64,
393        num_entries, dir_size, dir_offset);
394
395  /*
396   * It all looks good.  Create a mapping for the CD, and set the fields
397   * in archive.
398   */
399  android::FileMap* map = MapFileSegment(fd, dir_offset, dir_size,
400                                         true /* read only */, debug_file_name);
401  if (map == NULL) {
402    archive->directory_map = NULL;
403    return kMmapFailed;
404  }
405
406  archive->directory_map = map;
407  archive->num_entries = num_entries;
408  archive->directory_offset = dir_offset;
409
410  return 0;
411}
412
413/*
414 * Find the zip Central Directory and memory-map it.
415 *
416 * On success, returns 0 after populating fields from the EOCD area:
417 *   directory_offset
418 *   directory_map
419 *   num_entries
420 */
421static int32_t MapCentralDirectory(int fd, const char* debug_file_name,
422                                   ZipArchive* archive) {
423
424  // Test file length. We use lseek64 to make sure the file
425  // is small enough to be a zip file (Its size must be less than
426  // 0xffffffff bytes).
427  off64_t file_length = lseek64(fd, 0, SEEK_END);
428  if (file_length == -1) {
429    ALOGV("Zip: lseek on fd %d failed", fd);
430    return kInvalidFile;
431  }
432
433  if (file_length > (off64_t) 0xffffffff) {
434    ALOGV("Zip: zip file too long %" PRId64, (int64_t)file_length);
435    return kInvalidFile;
436  }
437
438  if (file_length < (int64_t) kEOCDLen) {
439    ALOGV("Zip: length %" PRId64 " is too small to be zip", (int64_t)file_length);
440    return kInvalidFile;
441  }
442
443  /*
444   * Perform the traditional EOCD snipe hunt.
445   *
446   * We're searching for the End of Central Directory magic number,
447   * which appears at the start of the EOCD block.  It's followed by
448   * 18 bytes of EOCD stuff and up to 64KB of archive comment.  We
449   * need to read the last part of the file into a buffer, dig through
450   * it to find the magic number, parse some values out, and use those
451   * to determine the extent of the CD.
452   *
453   * We start by pulling in the last part of the file.
454   */
455  uint32_t read_amount = kMaxEOCDSearch;
456  if (file_length < (off64_t) read_amount) {
457    read_amount = file_length;
458  }
459
460  uint8_t* scan_buffer = (uint8_t*) malloc(read_amount);
461  int32_t result = MapCentralDirectory0(fd, debug_file_name, archive,
462                                        file_length, read_amount, scan_buffer);
463
464  free(scan_buffer);
465  return result;
466}
467
468/*
469 * Parses the Zip archive's Central Directory.  Allocates and populates the
470 * hash table.
471 *
472 * Returns 0 on success.
473 */
474static int32_t ParseZipArchive(ZipArchive* archive) {
475  int32_t result = -1;
476  const uint8_t* cd_ptr = (const uint8_t*) archive->directory_map->getDataPtr();
477  size_t cd_length = archive->directory_map->getDataLength();
478  uint16_t num_entries = archive->num_entries;
479
480  /*
481   * Create hash table.  We have a minimum 75% load factor, possibly as
482   * low as 50% after we round off to a power of 2.  There must be at
483   * least one unused entry to avoid an infinite loop during creation.
484   */
485  archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3);
486  archive->hash_table = (ZipEntryName*) calloc(archive->hash_table_size,
487      sizeof(ZipEntryName));
488
489  /*
490   * Walk through the central directory, adding entries to the hash
491   * table and verifying values.
492   */
493  const uint8_t* ptr = cd_ptr;
494  for (uint16_t i = 0; i < num_entries; i++) {
495    if (get4LE(ptr) != kCDESignature) {
496      ALOGW("Zip: missed a central dir sig (at %" PRIu16 ")", i);
497      goto bail;
498    }
499
500    if (ptr + kCDELen > cd_ptr + cd_length) {
501      ALOGW("Zip: ran off the end (at %" PRIu16 ")", i);
502      goto bail;
503    }
504
505    const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset);
506    if (local_header_offset >= archive->directory_offset) {
507      ALOGW("Zip: bad LFH offset %" PRId64 " at entry %" PRIu16, (int64_t)local_header_offset, i);
508      goto bail;
509    }
510
511    const uint16_t file_name_length = get2LE(ptr + kCDENameLen);
512    const uint16_t extra_length = get2LE(ptr + kCDEExtraLen);
513    const uint16_t comment_length = get2LE(ptr + kCDECommentLen);
514
515    /* add the CDE filename to the hash table */
516    const int add_result = AddToHash(archive->hash_table,
517        archive->hash_table_size, (const char*) ptr + kCDELen, file_name_length);
518    if (add_result) {
519      ALOGW("Zip: Error adding entry to hash table %d", add_result);
520      result = add_result;
521      goto bail;
522    }
523
524    ptr += kCDELen + file_name_length + extra_length + comment_length;
525    if ((size_t)(ptr - cd_ptr) > cd_length) {
526      ALOGW("Zip: bad CD advance (%tu vs %zu) at entry %" PRIu16,
527          ptr - cd_ptr, cd_length, i);
528      goto bail;
529    }
530  }
531  ALOGV("+++ zip good scan %" PRIu16 " entries", num_entries);
532
533  result = 0;
534
535bail:
536  return result;
537}
538
539static int32_t OpenArchiveInternal(ZipArchive* archive,
540                                   const char* debug_file_name) {
541  int32_t result = -1;
542  if ((result = MapCentralDirectory(archive->fd, debug_file_name, archive))) {
543    return result;
544  }
545
546  if ((result = ParseZipArchive(archive))) {
547    return result;
548  }
549
550  return 0;
551}
552
553int32_t OpenArchiveFd(int fd, const char* debug_file_name,
554                      ZipArchiveHandle* handle) {
555  ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive));
556  memset(archive, 0, sizeof(*archive));
557  *handle = archive;
558
559  archive->fd = fd;
560
561  return OpenArchiveInternal(archive, debug_file_name);
562}
563
564int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle) {
565  ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive));
566  memset(archive, 0, sizeof(*archive));
567  *handle = archive;
568
569  const int fd = open(fileName, O_RDONLY | O_BINARY, 0);
570  if (fd < 0) {
571    ALOGW("Unable to open '%s': %s", fileName, strerror(errno));
572    return kIoError;
573  } else {
574    archive->fd = fd;
575  }
576
577  return OpenArchiveInternal(archive, fileName);
578}
579
580/*
581 * Close a ZipArchive, closing the file and freeing the contents.
582 */
583void CloseArchive(ZipArchiveHandle handle) {
584  ZipArchive* archive = (ZipArchive*) handle;
585  ALOGV("Closing archive %p", archive);
586
587  if (archive->fd >= 0) {
588    close(archive->fd);
589  }
590
591  if (archive->directory_map != NULL) {
592    archive->directory_map->release();
593  }
594  free(archive->hash_table);
595  free(archive);
596}
597
598static int32_t UpdateEntryFromDataDescriptor(int fd,
599                                             ZipEntry *entry) {
600  uint8_t ddBuf[kDDMaxLen];
601  ssize_t actual = TEMP_FAILURE_RETRY(read(fd, ddBuf, sizeof(ddBuf)));
602  if (actual != sizeof(ddBuf)) {
603    return kIoError;
604  }
605
606  const uint32_t ddSignature = get4LE(ddBuf);
607  uint16_t ddOffset = 0;
608  if (ddSignature == kDDOptSignature) {
609    ddOffset = 4;
610  }
611
612  entry->crc32 = get4LE(ddBuf + ddOffset + kDDCrc32);
613  entry->compressed_length = get4LE(ddBuf + ddOffset + kDDCompLen);
614  entry->uncompressed_length = get4LE(ddBuf + ddOffset + kDDUncompLen);
615
616  return 0;
617}
618
619// Attempts to read |len| bytes into |buf| at offset |off|.
620//
621// This method uses pread64 on platforms that support it and
622// lseek64 + read on platforms that don't. This implies that
623// callers should not rely on the |fd| offset being incremented
624// as a side effect of this call.
625static inline ssize_t ReadAtOffset(int fd, uint8_t* buf, size_t len,
626                                   off64_t off) {
627#ifdef HAVE_PREAD
628  return TEMP_FAILURE_RETRY(pread64(fd, buf, len, off));
629#else
630  // The only supported platform that doesn't support pread at the moment
631  // is Windows. Only recent versions of windows support unix like forks,
632  // and even there the semantics are quite different.
633  if (lseek64(fd, off, SEEK_SET) != off) {
634    ALOGW("Zip: failed seek to offset %" PRId64, off);
635    return kIoError;
636  }
637
638  return TEMP_FAILURE_RETRY(read(fd, buf, len));
639#endif  // HAVE_PREAD
640}
641
642static int32_t FindEntry(const ZipArchive* archive, const int ent,
643                         ZipEntry* data) {
644  const uint16_t nameLen = archive->hash_table[ent].name_length;
645  const char* name = archive->hash_table[ent].name;
646
647  // Recover the start of the central directory entry from the filename
648  // pointer.  The filename is the first entry past the fixed-size data,
649  // so we can just subtract back from that.
650  const unsigned char* ptr = (const unsigned char*) name;
651  ptr -= kCDELen;
652
653  // This is the base of our mmapped region, we have to sanity check that
654  // the name that's in the hash table is a pointer to a location within
655  // this mapped region.
656  const unsigned char* base_ptr = (const unsigned char*)
657    archive->directory_map->getDataPtr();
658  if (ptr < base_ptr || ptr > base_ptr + archive->directory_map->getDataLength()) {
659    ALOGW("Zip: Invalid entry pointer");
660    return kInvalidOffset;
661  }
662
663  // The offset of the start of the central directory in the zipfile.
664  // We keep this lying around so that we can sanity check all our lengths
665  // and our per-file structures.
666  const off64_t cd_offset = archive->directory_offset;
667
668  // Fill out the compression method, modification time, crc32
669  // and other interesting attributes from the central directory. These
670  // will later be compared against values from the local file header.
671  data->method = get2LE(ptr + kCDEMethod);
672  data->mod_time = get4LE(ptr + kCDEModWhen);
673  data->crc32 = get4LE(ptr + kCDECRC);
674  data->compressed_length = get4LE(ptr + kCDECompLen);
675  data->uncompressed_length = get4LE(ptr + kCDEUncompLen);
676
677  // Figure out the local header offset from the central directory. The
678  // actual file data will begin after the local header and the name /
679  // extra comments.
680  const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset);
681  if (local_header_offset + (off64_t) kLFHLen >= cd_offset) {
682    ALOGW("Zip: bad local hdr offset in zip");
683    return kInvalidOffset;
684  }
685
686  uint8_t lfh_buf[kLFHLen];
687  ssize_t actual = ReadAtOffset(archive->fd, lfh_buf, sizeof(lfh_buf),
688                                 local_header_offset);
689  if (actual != sizeof(lfh_buf)) {
690    ALOGW("Zip: failed reading lfh name from offset %" PRId64, (int64_t)local_header_offset);
691    return kIoError;
692  }
693
694  if (get4LE(lfh_buf) != kLFHSignature) {
695    ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64,
696        (int64_t)local_header_offset);
697    return kInvalidOffset;
698  }
699
700  // Paranoia: Match the values specified in the local file header
701  // to those specified in the central directory.
702  const uint16_t lfhGpbFlags = get2LE(lfh_buf + kLFHGPBFlags);
703  const uint16_t lfhNameLen = get2LE(lfh_buf + kLFHNameLen);
704  const uint16_t lfhExtraLen = get2LE(lfh_buf + kLFHExtraLen);
705
706  if ((lfhGpbFlags & kGPBDDFlagMask) == 0) {
707    const uint32_t lfhCrc = get4LE(lfh_buf + kLFHCRC);
708    const uint32_t lfhCompLen = get4LE(lfh_buf + kLFHCompLen);
709    const uint32_t lfhUncompLen = get4LE(lfh_buf + kLFHUncompLen);
710
711    data->has_data_descriptor = 0;
712    if (data->compressed_length != lfhCompLen || data->uncompressed_length != lfhUncompLen
713        || data->crc32 != lfhCrc) {
714      ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32
715        ", %" PRIx32 "}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}",
716        data->compressed_length, data->uncompressed_length, data->crc32,
717        lfhCompLen, lfhUncompLen, lfhCrc);
718      return kInconsistentInformation;
719    }
720  } else {
721    data->has_data_descriptor = 1;
722  }
723
724  // Check that the local file header name matches the declared
725  // name in the central directory.
726  if (lfhNameLen == nameLen) {
727    const off64_t name_offset = local_header_offset + kLFHLen;
728    if (name_offset + lfhNameLen >= cd_offset) {
729      ALOGW("Zip: Invalid declared length");
730      return kInvalidOffset;
731    }
732
733    uint8_t* name_buf = (uint8_t*) malloc(nameLen);
734    ssize_t actual = ReadAtOffset(archive->fd, name_buf, nameLen,
735                                  name_offset);
736
737    if (actual != nameLen) {
738      ALOGW("Zip: failed reading lfh name from offset %" PRId64, (int64_t)name_offset);
739      free(name_buf);
740      return kIoError;
741    }
742
743    if (memcmp(name, name_buf, nameLen)) {
744      free(name_buf);
745      return kInconsistentInformation;
746    }
747
748    free(name_buf);
749  } else {
750    ALOGW("Zip: lfh name did not match central directory.");
751    return kInconsistentInformation;
752  }
753
754  const off64_t data_offset = local_header_offset + kLFHLen + lfhNameLen + lfhExtraLen;
755  if (data_offset > cd_offset) {
756    ALOGW("Zip: bad data offset %" PRId64 " in zip", (int64_t)data_offset);
757    return kInvalidOffset;
758  }
759
760  if ((off64_t)(data_offset + data->compressed_length) > cd_offset) {
761    ALOGW("Zip: bad compressed length in zip (%" PRId64 " + %" PRIu32 " > %" PRId64 ")",
762      (int64_t)data_offset, data->compressed_length, (int64_t)cd_offset);
763    return kInvalidOffset;
764  }
765
766  if (data->method == kCompressStored &&
767    (off64_t)(data_offset + data->uncompressed_length) > cd_offset) {
768     ALOGW("Zip: bad uncompressed length in zip (%" PRId64 " + %" PRIu32 " > %" PRId64 ")",
769       data_offset, data->uncompressed_length, cd_offset);
770     return kInvalidOffset;
771  }
772
773  data->offset = data_offset;
774  return 0;
775}
776
777struct IterationHandle {
778  uint32_t position;
779  const char* prefix;
780  uint16_t prefix_len;
781  ZipArchive* archive;
782};
783
784int32_t StartIteration(ZipArchiveHandle handle, void** cookie_ptr, const char* prefix) {
785  ZipArchive* archive = (ZipArchive *) handle;
786
787  if (archive == NULL || archive->hash_table == NULL) {
788    ALOGW("Zip: Invalid ZipArchiveHandle");
789    return kInvalidHandle;
790  }
791
792  IterationHandle* cookie = (IterationHandle*) malloc(sizeof(IterationHandle));
793  cookie->position = 0;
794  cookie->prefix = prefix;
795  cookie->archive = archive;
796  if (prefix != NULL) {
797    cookie->prefix_len = strlen(prefix);
798  }
799
800  *cookie_ptr = cookie ;
801  return 0;
802}
803
804int32_t FindEntry(const ZipArchiveHandle handle, const char* entryName,
805                  ZipEntry* data) {
806  const ZipArchive* archive = (ZipArchive*) handle;
807  const int nameLen = strlen(entryName);
808  if (nameLen == 0 || nameLen > 65535) {
809    ALOGW("Zip: Invalid filename %s", entryName);
810    return kInvalidEntryName;
811  }
812
813  const int64_t ent = EntryToIndex(archive->hash_table,
814    archive->hash_table_size, entryName, nameLen);
815
816  if (ent < 0) {
817    ALOGV("Zip: Could not find entry %.*s", nameLen, entryName);
818    return ent;
819  }
820
821  return FindEntry(archive, ent, data);
822}
823
824int32_t Next(void* cookie, ZipEntry* data, ZipEntryName* name) {
825  IterationHandle* handle = (IterationHandle *) cookie;
826  if (handle == NULL) {
827    return kInvalidHandle;
828  }
829
830  ZipArchive* archive = handle->archive;
831  if (archive == NULL || archive->hash_table == NULL) {
832    ALOGW("Zip: Invalid ZipArchiveHandle");
833    return kInvalidHandle;
834  }
835
836  const uint32_t currentOffset = handle->position;
837  const uint32_t hash_table_length = archive->hash_table_size;
838  const ZipEntryName *hash_table = archive->hash_table;
839
840  for (uint32_t i = currentOffset; i < hash_table_length; ++i) {
841    if (hash_table[i].name != NULL &&
842        (handle->prefix == NULL ||
843         (memcmp(handle->prefix, hash_table[i].name, handle->prefix_len) == 0))) {
844      handle->position = (i + 1);
845      const int error = FindEntry(archive, i, data);
846      if (!error) {
847        name->name = hash_table[i].name;
848        name->name_length = hash_table[i].name_length;
849      }
850
851      return error;
852    }
853  }
854
855  handle->position = 0;
856  return kIterationEnd;
857}
858
859static int32_t InflateToFile(int fd, const ZipEntry* entry,
860                             uint8_t* begin, uint32_t length,
861                             uint64_t* crc_out) {
862  int32_t result = -1;
863  const uint32_t kBufSize = 32768;
864  uint8_t read_buf[kBufSize];
865  uint8_t write_buf[kBufSize];
866  z_stream zstream;
867  int zerr;
868
869  /*
870   * Initialize the zlib stream struct.
871   */
872  memset(&zstream, 0, sizeof(zstream));
873  zstream.zalloc = Z_NULL;
874  zstream.zfree = Z_NULL;
875  zstream.opaque = Z_NULL;
876  zstream.next_in = NULL;
877  zstream.avail_in = 0;
878  zstream.next_out = (Bytef*) write_buf;
879  zstream.avail_out = kBufSize;
880  zstream.data_type = Z_UNKNOWN;
881
882  /*
883   * Use the undocumented "negative window bits" feature to tell zlib
884   * that there's no zlib header waiting for it.
885   */
886  zerr = inflateInit2(&zstream, -MAX_WBITS);
887  if (zerr != Z_OK) {
888    if (zerr == Z_VERSION_ERROR) {
889      ALOGE("Installed zlib is not compatible with linked version (%s)",
890        ZLIB_VERSION);
891    } else {
892      ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr);
893    }
894
895    return kZlibError;
896  }
897
898  const uint32_t uncompressed_length = entry->uncompressed_length;
899
900  uint32_t compressed_length = entry->compressed_length;
901  uint32_t write_count = 0;
902  do {
903    /* read as much as we can */
904    if (zstream.avail_in == 0) {
905      const ZD_TYPE getSize = (compressed_length > kBufSize) ? kBufSize : compressed_length;
906      const ZD_TYPE actual = TEMP_FAILURE_RETRY(read(fd, read_buf, getSize));
907      if (actual != getSize) {
908        ALOGW("Zip: inflate read failed (" ZD " vs " ZD ")", actual, getSize);
909        result = kIoError;
910        goto z_bail;
911      }
912
913      compressed_length -= getSize;
914
915      zstream.next_in = read_buf;
916      zstream.avail_in = getSize;
917    }
918
919    /* uncompress the data */
920    zerr = inflate(&zstream, Z_NO_FLUSH);
921    if (zerr != Z_OK && zerr != Z_STREAM_END) {
922      ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)",
923          zerr, zstream.next_in, zstream.avail_in,
924          zstream.next_out, zstream.avail_out);
925      result = kZlibError;
926      goto z_bail;
927    }
928
929    /* write when we're full or when we're done */
930    if (zstream.avail_out == 0 ||
931      (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) {
932      const size_t write_size = zstream.next_out - write_buf;
933      // The file might have declared a bogus length.
934      if (write_size + write_count > length) {
935        goto z_bail;
936      }
937      memcpy(begin + write_count, write_buf, write_size);
938      write_count += write_size;
939
940      zstream.next_out = write_buf;
941      zstream.avail_out = kBufSize;
942    }
943  } while (zerr == Z_OK);
944
945  assert(zerr == Z_STREAM_END);     /* other errors should've been caught */
946
947  // stream.adler holds the crc32 value for such streams.
948  *crc_out = zstream.adler;
949
950  if (zstream.total_out != uncompressed_length || compressed_length != 0) {
951    ALOGW("Zip: size mismatch on inflated file (%lu vs %" PRIu32 ")",
952        zstream.total_out, uncompressed_length);
953    result = kInconsistentInformation;
954    goto z_bail;
955  }
956
957  result = 0;
958
959z_bail:
960  inflateEnd(&zstream);    /* free up any allocated structures */
961
962  return result;
963}
964
965int32_t ExtractToMemory(ZipArchiveHandle handle,
966                        ZipEntry* entry, uint8_t* begin, uint32_t size) {
967  ZipArchive* archive = (ZipArchive*) handle;
968  const uint16_t method = entry->method;
969  off64_t data_offset = entry->offset;
970
971  if (lseek64(archive->fd, data_offset, SEEK_SET) != data_offset) {
972    ALOGW("Zip: lseek to data at %" PRId64 " failed", (int64_t)data_offset);
973    return kIoError;
974  }
975
976  // this should default to kUnknownCompressionMethod.
977  int32_t return_value = -1;
978  uint64_t crc = 0;
979  if (method == kCompressStored) {
980    return_value = CopyFileToFile(archive->fd, begin, size, &crc);
981  } else if (method == kCompressDeflated) {
982    return_value = InflateToFile(archive->fd, entry, begin, size, &crc);
983  }
984
985  if (!return_value && entry->has_data_descriptor) {
986    return_value = UpdateEntryFromDataDescriptor(archive->fd, entry);
987    if (return_value) {
988      return return_value;
989    }
990  }
991
992  // TODO: Fix this check by passing the right flags to inflate2 so that
993  // it calculates the CRC for us.
994  if (entry->crc32 != crc && false) {
995    ALOGW("Zip: crc mismatch: expected %" PRIu32 ", was %" PRIu64, entry->crc32, crc);
996    return kInconsistentInformation;
997  }
998
999  return return_value;
1000}
1001
1002int32_t ExtractEntryToFile(ZipArchiveHandle handle,
1003                           ZipEntry* entry, int fd) {
1004  const int32_t declared_length = entry->uncompressed_length;
1005
1006  const off64_t current_offset = lseek64(fd, 0, SEEK_CUR);
1007  if (current_offset == -1) {
1008    ALOGW("Zip: unable to seek to current location on fd %d: %s", fd,
1009          strerror(errno));
1010    return kIoError;
1011  }
1012
1013  int result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length + current_offset));
1014  if (result == -1) {
1015    ALOGW("Zip: unable to truncate file to %" PRId64 ": %s",
1016          (int64_t)(declared_length + current_offset), strerror(errno));
1017    return kIoError;
1018  }
1019
1020  // Don't attempt to map a region of length 0. We still need the
1021  // ftruncate() though, since the API guarantees that we will truncate
1022  // the file to the end of the uncompressed output.
1023  if (declared_length == 0) {
1024      return 0;
1025  }
1026
1027  android::FileMap* map  = MapFileSegment(fd, current_offset, declared_length,
1028                                          false, kTempMappingFileName);
1029  if (map == NULL) {
1030    return kMmapFailed;
1031  }
1032
1033  const int32_t error = ExtractToMemory(handle, entry,
1034                                        reinterpret_cast<uint8_t*>(map->getDataPtr()),
1035                                        map->getDataLength());
1036  map->release();
1037  return error;
1038}
1039
1040const char* ErrorCodeString(int32_t error_code) {
1041  if (error_code > kErrorMessageLowerBound && error_code < kErrorMessageUpperBound) {
1042    return kErrorMessages[error_code * -1];
1043  }
1044
1045  return kErrorMessages[0];
1046}
1047
1048int GetFileDescriptor(const ZipArchiveHandle handle) {
1049  return ((ZipArchive*) handle)->fd;
1050}
1051
1052