zip_archive.cc revision 99ef9914be1e39276e2e077670368927a1221921
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *    http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Read-only access to Zip archives, with minimal heap allocation.
19 */
20
21#include <assert.h>
22#include <errno.h>
23#include <fcntl.h>
24#include <inttypes.h>
25#include <limits.h>
26#include <log/log.h>
27#include <stdlib.h>
28#include <string.h>
29#include <unistd.h>
30#include <utils/FileMap.h>
31#include <zlib.h>
32
33#include <JNIHelp.h>  // TEMP_FAILURE_RETRY may or may not be in unistd
34
35#include "ziparchive/zip_archive.h"
36
37// This is for windows. If we don't open a file in binary mode, weirds
38// things will happen.
39#ifndef O_BINARY
40#define O_BINARY 0
41#endif
42
43/*
44 * Zip file constants.
45 */
46static const uint32_t kEOCDSignature    = 0x06054b50;
47static const uint32_t kEOCDLen          = 2;
48static const uint32_t kEOCDNumEntries   = 8;              // offset to #of entries in file
49static const uint32_t kEOCDSize         = 12;             // size of the central directory
50static const uint32_t kEOCDFileOffset   = 16;             // offset to central directory
51
52static const uint32_t kMaxCommentLen    = 65535;          // longest possible in ushort
53static const uint32_t kMaxEOCDSearch    = (kMaxCommentLen + kEOCDLen);
54
55static const uint32_t kLFHSignature     = 0x04034b50;
56static const uint32_t kLFHLen           = 30;             // excluding variable-len fields
57static const uint32_t kLFHGPBFlags      = 6;              // general purpose bit flags
58static const uint32_t kLFHCRC           = 14;             // offset to CRC
59static const uint32_t kLFHCompLen       = 18;             // offset to compressed length
60static const uint32_t kLFHUncompLen     = 22;             // offset to uncompressed length
61static const uint32_t kLFHNameLen       = 26;             // offset to filename length
62static const uint32_t kLFHExtraLen      = 28;             // offset to extra length
63
64static const uint32_t kCDESignature     = 0x02014b50;
65static const uint32_t kCDELen           = 46;             // excluding variable-len fields
66static const uint32_t kCDEMethod        = 10;             // offset to compression method
67static const uint32_t kCDEModWhen       = 12;             // offset to modification timestamp
68static const uint32_t kCDECRC           = 16;             // offset to entry CRC
69static const uint32_t kCDECompLen       = 20;             // offset to compressed length
70static const uint32_t kCDEUncompLen     = 24;             // offset to uncompressed length
71static const uint32_t kCDENameLen       = 28;             // offset to filename length
72static const uint32_t kCDEExtraLen      = 30;             // offset to extra length
73static const uint32_t kCDECommentLen    = 32;             // offset to comment length
74static const uint32_t kCDELocalOffset   = 42;             // offset to local hdr
75
76static const uint32_t kDDOptSignature   = 0x08074b50;     // *OPTIONAL* data descriptor signature
77static const uint32_t kDDSignatureLen   = 4;
78static const uint32_t kDDLen            = 12;
79static const uint32_t kDDMaxLen         = 16;             // max of 16 bytes with a signature, 12 bytes without
80static const uint32_t kDDCrc32          = 0;              // offset to crc32
81static const uint32_t kDDCompLen        = 4;              // offset to compressed length
82static const uint32_t kDDUncompLen      = 8;              // offset to uncompressed length
83
84static const uint32_t kGPBDDFlagMask    = 0x0008;         // mask value that signifies that the entry has a DD
85
86static const uint32_t kMaxErrorLen = 1024;
87
88static const char* kErrorMessages[] = {
89  "Unknown return code.",
90  "Iteration ended",
91  "Zlib error",
92  "Invalid file",
93  "Invalid handle",
94  "Duplicate entries in archive",
95  "Empty archive",
96  "Entry not found",
97  "Invalid offset",
98  "Inconsistent information",
99  "Invalid entry name",
100  "I/O Error",
101  "File mapping failed"
102};
103
104static const int32_t kErrorMessageUpperBound = 0;
105
106static const int32_t kIterationEnd = -1;
107
108// We encountered a Zlib error when inflating a stream from this file.
109// Usually indicates file corruption.
110static const int32_t kZlibError = -2;
111
112// The input file cannot be processed as a zip archive. Usually because
113// it's too small, too large or does not have a valid signature.
114static const int32_t kInvalidFile = -3;
115
116// An invalid iteration / ziparchive handle was passed in as an input
117// argument.
118static const int32_t kInvalidHandle = -4;
119
120// The zip archive contained two (or possibly more) entries with the same
121// name.
122static const int32_t kDuplicateEntry = -5;
123
124// The zip archive contains no entries.
125static const int32_t kEmptyArchive = -6;
126
127// The specified entry was not found in the archive.
128static const int32_t kEntryNotFound = -7;
129
130// The zip archive contained an invalid local file header pointer.
131static const int32_t kInvalidOffset = -8;
132
133// The zip archive contained inconsistent entry information. This could
134// be because the central directory & local file header did not agree, or
135// if the actual uncompressed length or crc32 do not match their declared
136// values.
137static const int32_t kInconsistentInformation = -9;
138
139// An invalid entry name was encountered.
140static const int32_t kInvalidEntryName = -10;
141
142// An I/O related system call (read, lseek, ftruncate, map) failed.
143static const int32_t kIoError = -11;
144
145// We were not able to mmap the central directory or entry contents.
146static const int32_t kMmapFailed = -12;
147
148static const int32_t kErrorMessageLowerBound = -13;
149
150static const char kTempMappingFileName[] = "zip: ExtractFileToFile";
151
152/*
153 * A Read-only Zip archive.
154 *
155 * We want "open" and "find entry by name" to be fast operations, and
156 * we want to use as little memory as possible.  We memory-map the zip
157 * central directory, and load a hash table with pointers to the filenames
158 * (which aren't null-terminated).  The other fields are at a fixed offset
159 * from the filename, so we don't need to extract those (but we do need
160 * to byte-read and endian-swap them every time we want them).
161 *
162 * It's possible that somebody has handed us a massive (~1GB) zip archive,
163 * so we can't expect to mmap the entire file.
164 *
165 * To speed comparisons when doing a lookup by name, we could make the mapping
166 * "private" (copy-on-write) and null-terminate the filenames after verifying
167 * the record structure.  However, this requires a private mapping of
168 * every page that the Central Directory touches.  Easier to tuck a copy
169 * of the string length into the hash table entry.
170 */
171struct ZipArchive {
172  /* open Zip archive */
173  int fd;
174
175  /* mapped central directory area */
176  off64_t directory_offset;
177  android::FileMap* directory_map;
178
179  /* number of entries in the Zip archive */
180  uint16_t num_entries;
181
182  /*
183   * We know how many entries are in the Zip archive, so we can have a
184   * fixed-size hash table. We define a load factor of 0.75 and overallocat
185   * so the maximum number entries can never be higher than
186   * ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
187   */
188  uint32_t hash_table_size;
189  ZipEntryName* hash_table;
190};
191
192// Returns 0 on success and negative values on failure.
193static android::FileMap* MapFileSegment(const int fd, const off64_t start,
194                                        const size_t length, const bool read_only,
195                                        const char* debug_file_name) {
196  android::FileMap* file_map = new android::FileMap;
197  const bool success = file_map->create(debug_file_name, fd, start, length, read_only);
198  if (!success) {
199    file_map->release();
200    return NULL;
201  }
202
203  return file_map;
204}
205
206static int32_t CopyFileToFile(int fd, uint8_t* begin, const uint32_t length, uint64_t *crc_out) {
207  static const uint32_t kBufSize = 32768;
208  uint8_t buf[kBufSize];
209
210  uint32_t count = 0;
211  uint64_t crc = 0;
212  while (count < length) {
213    uint32_t remaining = length - count;
214
215    // Safe conversion because kBufSize is narrow enough for a 32 bit signed
216    // value.
217    ssize_t get_size = (remaining > kBufSize) ? kBufSize : remaining;
218    ssize_t actual = TEMP_FAILURE_RETRY(read(fd, buf, get_size));
219
220    if (actual != get_size) {
221      ALOGW("CopyFileToFile: copy read failed (%zd vs %zd)", actual, get_size);
222      return kIoError;
223    }
224
225    memcpy(begin + count, buf, get_size);
226    crc = crc32(crc, buf, get_size);
227    count += get_size;
228  }
229
230  *crc_out = crc;
231
232  return 0;
233}
234
235/*
236 * Round up to the next highest power of 2.
237 *
238 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
239 */
240static uint32_t RoundUpPower2(uint32_t val) {
241  val--;
242  val |= val >> 1;
243  val |= val >> 2;
244  val |= val >> 4;
245  val |= val >> 8;
246  val |= val >> 16;
247  val++;
248
249  return val;
250}
251
252static uint32_t ComputeHash(const char* str, uint16_t len) {
253  uint32_t hash = 0;
254
255  while (len--) {
256    hash = hash * 31 + *str++;
257  }
258
259  return hash;
260}
261
262/*
263 * Convert a ZipEntry to a hash table index, verifying that it's in a
264 * valid range.
265 */
266static int64_t EntryToIndex(const ZipEntryName* hash_table,
267                            const uint32_t hash_table_size,
268                            const char* name, uint16_t length) {
269  const uint32_t hash = ComputeHash(name, length);
270
271  // NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
272  uint32_t ent = hash & (hash_table_size - 1);
273  while (hash_table[ent].name != NULL) {
274    if (hash_table[ent].name_length == length &&
275        memcmp(hash_table[ent].name, name, length) == 0) {
276      return ent;
277    }
278
279    ent = (ent + 1) & (hash_table_size - 1);
280  }
281
282  ALOGV("Zip: Unable to find entry %.*s", length, name);
283  return kEntryNotFound;
284}
285
286/*
287 * Add a new entry to the hash table.
288 */
289static int32_t AddToHash(ZipEntryName *hash_table, const uint64_t hash_table_size,
290                         const char* name, uint16_t length) {
291  const uint64_t hash = ComputeHash(name, length);
292  uint32_t ent = hash & (hash_table_size - 1);
293
294  /*
295   * We over-allocated the table, so we're guaranteed to find an empty slot.
296   * Further, we guarantee that the hashtable size is not 0.
297   */
298  while (hash_table[ent].name != NULL) {
299    if (hash_table[ent].name_length == length &&
300        memcmp(hash_table[ent].name, name, length) == 0) {
301      // We've found a duplicate entry. We don't accept it
302      ALOGW("Zip: Found duplicate entry %.*s", length, name);
303      return kDuplicateEntry;
304    }
305    ent = (ent + 1) & (hash_table_size - 1);
306  }
307
308  hash_table[ent].name = name;
309  hash_table[ent].name_length = length;
310  return 0;
311}
312
313/*
314 * Get 2 little-endian bytes.
315 */
316static uint16_t get2LE(const uint8_t* src) {
317  return src[0] | (src[1] << 8);
318}
319
320/*
321 * Get 4 little-endian bytes.
322 */
323static uint32_t get4LE(const uint8_t* src) {
324  uint32_t result;
325
326  result = src[0];
327  result |= src[1] << 8;
328  result |= src[2] << 16;
329  result |= src[3] << 24;
330
331  return result;
332}
333
334static int32_t MapCentralDirectory0(int fd, const char* debug_file_name,
335                                    ZipArchive* archive, off64_t file_length,
336                                    uint32_t read_amount, uint8_t* scan_buffer) {
337  const off64_t search_start = file_length - read_amount;
338
339  if (lseek64(fd, search_start, SEEK_SET) != search_start) {
340    ALOGW("Zip: seek %" PRId64 " failed: %s", search_start, strerror(errno));
341    return kIoError;
342  }
343  ssize_t actual = TEMP_FAILURE_RETRY(read(fd, scan_buffer, read_amount));
344  if (actual != (ssize_t) read_amount) {
345    ALOGW("Zip: read %u failed: %s", read_amount, strerror(errno));
346    return kIoError;
347  }
348
349  /*
350   * Scan backward for the EOCD magic.  In an archive without a trailing
351   * comment, we'll find it on the first try.  (We may want to consider
352   * doing an initial minimal read; if we don't find it, retry with a
353   * second read as above.)
354   */
355  int i;
356  for (i = read_amount - kEOCDLen; i >= 0; i--) {
357    if (scan_buffer[i] == 0x50 && get4LE(&scan_buffer[i]) == kEOCDSignature) {
358      ALOGV("+++ Found EOCD at buf+%d", i);
359      break;
360    }
361  }
362  if (i < 0) {
363    ALOGD("Zip: EOCD not found, %s is not zip", debug_file_name);
364    return kInvalidFile;
365  }
366
367  const off64_t eocd_offset = search_start + i;
368  const uint8_t* eocd_ptr = scan_buffer + i;
369
370  assert(eocd_offset < file_length);
371
372  /*
373   * Grab the CD offset and size, and the number of entries in the
374   * archive.  Verify that they look reasonable. Widen dir_size and
375   * dir_offset to the file offset type.
376   */
377  const uint16_t num_entries = get2LE(eocd_ptr + kEOCDNumEntries);
378  const off64_t dir_size = get4LE(eocd_ptr + kEOCDSize);
379  const off64_t dir_offset = get4LE(eocd_ptr + kEOCDFileOffset);
380
381  if (dir_offset + dir_size > eocd_offset) {
382    ALOGW("Zip: bad offsets (dir %" PRId64 ", size %" PRId64 ", eocd %" PRId64 ")",
383        dir_offset, dir_size, eocd_offset);
384    return kInvalidOffset;
385  }
386  if (num_entries == 0) {
387    ALOGW("Zip: empty archive?");
388    return kEmptyArchive;
389  }
390
391  ALOGV("+++ num_entries=%d dir_size=%" PRId64 " dir_offset=%" PRId64,
392        num_entries, dir_size, dir_offset);
393
394  /*
395   * It all looks good.  Create a mapping for the CD, and set the fields
396   * in archive.
397   */
398  android::FileMap* map = MapFileSegment(fd, dir_offset, dir_size,
399                                         true /* read only */, debug_file_name);
400  if (map == NULL) {
401    archive->directory_map = NULL;
402    return kMmapFailed;
403  }
404
405  archive->directory_map = map;
406  archive->num_entries = num_entries;
407  archive->directory_offset = dir_offset;
408
409  return 0;
410}
411
412/*
413 * Find the zip Central Directory and memory-map it.
414 *
415 * On success, returns 0 after populating fields from the EOCD area:
416 *   directory_offset
417 *   directory_map
418 *   num_entries
419 */
420static int32_t MapCentralDirectory(int fd, const char* debug_file_name,
421                                   ZipArchive* archive) {
422
423  // Test file length. We use lseek64 to make sure the file
424  // is small enough to be a zip file (Its size must be less than
425  // 0xffffffff bytes).
426  off64_t file_length = lseek64(fd, 0, SEEK_END);
427  if (file_length == -1) {
428    ALOGV("Zip: lseek on fd %d failed", fd);
429    return kInvalidFile;
430  }
431
432  if (file_length > (off64_t) 0xffffffff) {
433    ALOGV("Zip: zip file too long %" PRId64, file_length);
434    return kInvalidFile;
435  }
436
437  if (file_length < (int64_t) kEOCDLen) {
438    ALOGV("Zip: length %" PRId64 " is too small to be zip", file_length);
439    return kInvalidFile;
440  }
441
442  /*
443   * Perform the traditional EOCD snipe hunt.
444   *
445   * We're searching for the End of Central Directory magic number,
446   * which appears at the start of the EOCD block.  It's followed by
447   * 18 bytes of EOCD stuff and up to 64KB of archive comment.  We
448   * need to read the last part of the file into a buffer, dig through
449   * it to find the magic number, parse some values out, and use those
450   * to determine the extent of the CD.
451   *
452   * We start by pulling in the last part of the file.
453   */
454  uint32_t read_amount = kMaxEOCDSearch;
455  if (file_length < (off64_t) read_amount) {
456    read_amount = file_length;
457  }
458
459  uint8_t* scan_buffer = (uint8_t*) malloc(read_amount);
460  int32_t result = MapCentralDirectory0(fd, debug_file_name, archive,
461                                        file_length, read_amount, scan_buffer);
462
463  free(scan_buffer);
464  return result;
465}
466
467/*
468 * Parses the Zip archive's Central Directory.  Allocates and populates the
469 * hash table.
470 *
471 * Returns 0 on success.
472 */
473static int32_t ParseZipArchive(ZipArchive* archive) {
474  int32_t result = -1;
475  const uint8_t* cd_ptr = (const uint8_t*) archive->directory_map->getDataPtr();
476  size_t cd_length = archive->directory_map->getDataLength();
477  uint16_t num_entries = archive->num_entries;
478
479  /*
480   * Create hash table.  We have a minimum 75% load factor, possibly as
481   * low as 50% after we round off to a power of 2.  There must be at
482   * least one unused entry to avoid an infinite loop during creation.
483   */
484  archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3);
485  archive->hash_table = (ZipEntryName*) calloc(archive->hash_table_size,
486      sizeof(ZipEntryName));
487
488  /*
489   * Walk through the central directory, adding entries to the hash
490   * table and verifying values.
491   */
492  const uint8_t* ptr = cd_ptr;
493  for (uint16_t i = 0; i < num_entries; i++) {
494    if (get4LE(ptr) != kCDESignature) {
495      ALOGW("Zip: missed a central dir sig (at %d)", i);
496      goto bail;
497    }
498
499    if (ptr + kCDELen > cd_ptr + cd_length) {
500      ALOGW("Zip: ran off the end (at %d)", i);
501      goto bail;
502    }
503
504    const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset);
505    if (local_header_offset >= archive->directory_offset) {
506      ALOGW("Zip: bad LFH offset %" PRId64 " at entry %d", local_header_offset, i);
507      goto bail;
508    }
509
510    const uint16_t file_name_length = get2LE(ptr + kCDENameLen);
511    const uint16_t extra_length = get2LE(ptr + kCDEExtraLen);
512    const uint16_t comment_length = get2LE(ptr + kCDECommentLen);
513
514    /* add the CDE filename to the hash table */
515    const int add_result = AddToHash(archive->hash_table,
516        archive->hash_table_size, (const char*) ptr + kCDELen, file_name_length);
517    if (add_result) {
518      ALOGW("Zip: Error adding entry to hash table %d", add_result);
519      result = add_result;
520      goto bail;
521    }
522
523    ptr += kCDELen + file_name_length + extra_length + comment_length;
524    if ((size_t)(ptr - cd_ptr) > cd_length) {
525      ALOGW("Zip: bad CD advance (%zu vs %zu) at entry %d",
526        (size_t) (ptr - cd_ptr), cd_length, i);
527      goto bail;
528    }
529  }
530  ALOGV("+++ zip good scan %d entries", num_entries);
531
532  result = 0;
533
534bail:
535  return result;
536}
537
538static int32_t OpenArchiveInternal(ZipArchive* archive,
539                                   const char* debug_file_name) {
540  int32_t result = -1;
541  if ((result = MapCentralDirectory(archive->fd, debug_file_name, archive))) {
542    return result;
543  }
544
545  if ((result = ParseZipArchive(archive))) {
546    return result;
547  }
548
549  return 0;
550}
551
552int32_t OpenArchiveFd(int fd, const char* debug_file_name,
553                      ZipArchiveHandle* handle) {
554  ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive));
555  memset(archive, 0, sizeof(*archive));
556  *handle = archive;
557
558  archive->fd = fd;
559
560  return OpenArchiveInternal(archive, debug_file_name);
561}
562
563int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle) {
564  ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive));
565  memset(archive, 0, sizeof(*archive));
566  *handle = archive;
567
568  const int fd = open(fileName, O_RDONLY | O_BINARY, 0);
569  if (fd < 0) {
570    ALOGW("Unable to open '%s': %s", fileName, strerror(errno));
571    return kIoError;
572  } else {
573    archive->fd = fd;
574  }
575
576  return OpenArchiveInternal(archive, fileName);
577}
578
579/*
580 * Close a ZipArchive, closing the file and freeing the contents.
581 */
582void CloseArchive(ZipArchiveHandle handle) {
583  ZipArchive* archive = (ZipArchive*) handle;
584  ALOGV("Closing archive %p", archive);
585
586  if (archive->fd >= 0) {
587    close(archive->fd);
588  }
589
590  if (archive->directory_map != NULL) {
591    archive->directory_map->release();
592  }
593  free(archive->hash_table);
594  free(archive);
595}
596
597static int32_t UpdateEntryFromDataDescriptor(int fd,
598                                             ZipEntry *entry) {
599  uint8_t ddBuf[kDDMaxLen];
600  ssize_t actual = TEMP_FAILURE_RETRY(read(fd, ddBuf, sizeof(ddBuf)));
601  if (actual != sizeof(ddBuf)) {
602    return kIoError;
603  }
604
605  const uint32_t ddSignature = get4LE(ddBuf);
606  uint16_t ddOffset = 0;
607  if (ddSignature == kDDOptSignature) {
608    ddOffset = 4;
609  }
610
611  entry->crc32 = get4LE(ddBuf + ddOffset + kDDCrc32);
612  entry->compressed_length = get4LE(ddBuf + ddOffset + kDDCompLen);
613  entry->uncompressed_length = get4LE(ddBuf + ddOffset + kDDUncompLen);
614
615  return 0;
616}
617
618// Attempts to read |len| bytes into |buf| at offset |off|.
619//
620// This method uses pread64 on platforms that support it and
621// lseek64 + read on platforms that don't. This implies that
622// callers should not rely on the |fd| offset being incremented
623// as a side effect of this call.
624static inline ssize_t ReadAtOffset(int fd, uint8_t* buf, size_t len,
625                                   off64_t off) {
626#ifdef HAVE_PREAD
627  return TEMP_FAILURE_RETRY(pread64(fd, buf, len, off));
628#else
629  // The only supported platform that doesn't support pread at the moment
630  // is Windows. Only recent versions of windows support unix like forks,
631  // and even there the semantics are quite different.
632  if (lseek64(fd, off, SEEK_SET) != off) {
633    ALOGW("Zip: failed seek to offset %" PRId64, off);
634    return kIoError;
635  }
636
637  return TEMP_FAILURE_RETRY(read(fd, buf, len));
638#endif  // HAVE_PREAD
639}
640
641static int32_t FindEntry(const ZipArchive* archive, const int ent,
642                         ZipEntry* data) {
643  const uint16_t nameLen = archive->hash_table[ent].name_length;
644  const char* name = archive->hash_table[ent].name;
645
646  // Recover the start of the central directory entry from the filename
647  // pointer.  The filename is the first entry past the fixed-size data,
648  // so we can just subtract back from that.
649  const unsigned char* ptr = (const unsigned char*) name;
650  ptr -= kCDELen;
651
652  // This is the base of our mmapped region, we have to sanity check that
653  // the name that's in the hash table is a pointer to a location within
654  // this mapped region.
655  const unsigned char* base_ptr = (const unsigned char*)
656    archive->directory_map->getDataPtr();
657  if (ptr < base_ptr || ptr > base_ptr + archive->directory_map->getDataLength()) {
658    ALOGW("Zip: Invalid entry pointer");
659    return kInvalidOffset;
660  }
661
662  // The offset of the start of the central directory in the zipfile.
663  // We keep this lying around so that we can sanity check all our lengths
664  // and our per-file structures.
665  const off64_t cd_offset = archive->directory_offset;
666
667  // Fill out the compression method, modification time, crc32
668  // and other interesting attributes from the central directory. These
669  // will later be compared against values from the local file header.
670  data->method = get2LE(ptr + kCDEMethod);
671  data->mod_time = get4LE(ptr + kCDEModWhen);
672  data->crc32 = get4LE(ptr + kCDECRC);
673  data->compressed_length = get4LE(ptr + kCDECompLen);
674  data->uncompressed_length = get4LE(ptr + kCDEUncompLen);
675
676  // Figure out the local header offset from the central directory. The
677  // actual file data will begin after the local header and the name /
678  // extra comments.
679  const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset);
680  if (local_header_offset + (off64_t) kLFHLen >= cd_offset) {
681    ALOGW("Zip: bad local hdr offset in zip");
682    return kInvalidOffset;
683  }
684
685  uint8_t lfh_buf[kLFHLen];
686  ssize_t actual = ReadAtOffset(archive->fd, lfh_buf, sizeof(lfh_buf),
687                                 local_header_offset);
688  if (actual != sizeof(lfh_buf)) {
689    ALOGW("Zip: failed reading lfh name from offset %" PRId64, local_header_offset);
690    return kIoError;
691  }
692
693  if (get4LE(lfh_buf) != kLFHSignature) {
694    ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64,
695        local_header_offset);
696    return kInvalidOffset;
697  }
698
699  // Paranoia: Match the values specified in the local file header
700  // to those specified in the central directory.
701  const uint16_t lfhGpbFlags = get2LE(lfh_buf + kLFHGPBFlags);
702  const uint16_t lfhNameLen = get2LE(lfh_buf + kLFHNameLen);
703  const uint16_t lfhExtraLen = get2LE(lfh_buf + kLFHExtraLen);
704
705  if ((lfhGpbFlags & kGPBDDFlagMask) == 0) {
706    const uint32_t lfhCrc = get4LE(lfh_buf + kLFHCRC);
707    const uint32_t lfhCompLen = get4LE(lfh_buf + kLFHCompLen);
708    const uint32_t lfhUncompLen = get4LE(lfh_buf + kLFHUncompLen);
709
710    data->has_data_descriptor = 0;
711    if (data->compressed_length != lfhCompLen || data->uncompressed_length != lfhUncompLen
712        || data->crc32 != lfhCrc) {
713      ALOGW("Zip: size/crc32 mismatch. expected {%d, %d, %x}, was {%d, %d, %x}",
714        data->compressed_length, data->uncompressed_length, data->crc32,
715        lfhCompLen, lfhUncompLen, lfhCrc);
716      return kInconsistentInformation;
717    }
718  } else {
719    data->has_data_descriptor = 1;
720  }
721
722  // Check that the local file header name matches the declared
723  // name in the central directory.
724  if (lfhNameLen == nameLen) {
725    const off64_t name_offset = local_header_offset + kLFHLen;
726    if (name_offset + lfhNameLen >= cd_offset) {
727      ALOGW("Zip: Invalid declared length");
728      return kInvalidOffset;
729    }
730
731    uint8_t* name_buf = (uint8_t*) malloc(nameLen);
732    ssize_t actual = ReadAtOffset(archive->fd, name_buf, nameLen,
733                                  name_offset);
734
735    if (actual != nameLen) {
736      ALOGW("Zip: failed reading lfh name from offset %" PRId64, name_offset);
737      free(name_buf);
738      return kIoError;
739    }
740
741    if (memcmp(name, name_buf, nameLen)) {
742      free(name_buf);
743      return kInconsistentInformation;
744    }
745
746    free(name_buf);
747  } else {
748    ALOGW("Zip: lfh name did not match central directory.");
749    return kInconsistentInformation;
750  }
751
752  const off64_t data_offset = local_header_offset + kLFHLen + lfhNameLen + lfhExtraLen;
753  if (data_offset > cd_offset) {
754    ALOGW("Zip: bad data offset %" PRId64 " in zip", data_offset);
755    return kInvalidOffset;
756  }
757
758  if ((off64_t)(data_offset + data->compressed_length) > cd_offset) {
759    ALOGW("Zip: bad compressed length in zip (%" PRId64 " + %zd > %" PRId64 ")",
760      data_offset, data->compressed_length, cd_offset);
761    return kInvalidOffset;
762  }
763
764  if (data->method == kCompressStored &&
765    (off64_t)(data_offset + data->uncompressed_length) > cd_offset) {
766     ALOGW("Zip: bad uncompressed length in zip (%" PRId64 " + %d > %" PRId64 ")",
767       data_offset, data->uncompressed_length, cd_offset);
768     return kInvalidOffset;
769  }
770
771  data->offset = data_offset;
772  return 0;
773}
774
775struct IterationHandle {
776  uint32_t position;
777  const char* prefix;
778  uint16_t prefix_len;
779  ZipArchive* archive;
780};
781
782int32_t StartIteration(ZipArchiveHandle handle, void** cookie_ptr, const char* prefix) {
783  ZipArchive* archive = (ZipArchive *) handle;
784
785  if (archive == NULL || archive->hash_table == NULL) {
786    ALOGW("Zip: Invalid ZipArchiveHandle");
787    return kInvalidHandle;
788  }
789
790  IterationHandle* cookie = (IterationHandle*) malloc(sizeof(IterationHandle));
791  cookie->position = 0;
792  cookie->prefix = prefix;
793  cookie->archive = archive;
794  if (prefix != NULL) {
795    cookie->prefix_len = strlen(prefix);
796  }
797
798  *cookie_ptr = cookie ;
799  return 0;
800}
801
802int32_t FindEntry(const ZipArchiveHandle handle, const char* entryName,
803                  ZipEntry* data) {
804  const ZipArchive* archive = (ZipArchive*) handle;
805  const int nameLen = strlen(entryName);
806  if (nameLen == 0 || nameLen > 65535) {
807    ALOGW("Zip: Invalid filename %s", entryName);
808    return kInvalidEntryName;
809  }
810
811  const int64_t ent = EntryToIndex(archive->hash_table,
812    archive->hash_table_size, entryName, nameLen);
813
814  if (ent < 0) {
815    ALOGV("Zip: Could not find entry %.*s", nameLen, entryName);
816    return ent;
817  }
818
819  return FindEntry(archive, ent, data);
820}
821
822int32_t Next(void* cookie, ZipEntry* data, ZipEntryName* name) {
823  IterationHandle* handle = (IterationHandle *) cookie;
824  if (handle == NULL) {
825    return kInvalidHandle;
826  }
827
828  ZipArchive* archive = handle->archive;
829  if (archive == NULL || archive->hash_table == NULL) {
830    ALOGW("Zip: Invalid ZipArchiveHandle");
831    return kInvalidHandle;
832  }
833
834  const uint32_t currentOffset = handle->position;
835  const uint32_t hash_table_length = archive->hash_table_size;
836  const ZipEntryName *hash_table = archive->hash_table;
837
838  for (uint32_t i = currentOffset; i < hash_table_length; ++i) {
839    if (hash_table[i].name != NULL &&
840        (handle->prefix == NULL ||
841         (memcmp(handle->prefix, hash_table[i].name, handle->prefix_len) == 0))) {
842      handle->position = (i + 1);
843      const int error = FindEntry(archive, i, data);
844      if (!error) {
845        name->name = hash_table[i].name;
846        name->name_length = hash_table[i].name_length;
847      }
848
849      return error;
850    }
851  }
852
853  handle->position = 0;
854  return kIterationEnd;
855}
856
857static int32_t InflateToFile(int fd, const ZipEntry* entry,
858                             uint8_t* begin, uint32_t length,
859                             uint64_t* crc_out) {
860  int32_t result = -1;
861  const uint32_t kBufSize = 32768;
862  uint8_t read_buf[kBufSize];
863  uint8_t write_buf[kBufSize];
864  z_stream zstream;
865  int zerr;
866
867  /*
868   * Initialize the zlib stream struct.
869   */
870  memset(&zstream, 0, sizeof(zstream));
871  zstream.zalloc = Z_NULL;
872  zstream.zfree = Z_NULL;
873  zstream.opaque = Z_NULL;
874  zstream.next_in = NULL;
875  zstream.avail_in = 0;
876  zstream.next_out = (Bytef*) write_buf;
877  zstream.avail_out = kBufSize;
878  zstream.data_type = Z_UNKNOWN;
879
880  /*
881   * Use the undocumented "negative window bits" feature to tell zlib
882   * that there's no zlib header waiting for it.
883   */
884  zerr = inflateInit2(&zstream, -MAX_WBITS);
885  if (zerr != Z_OK) {
886    if (zerr == Z_VERSION_ERROR) {
887      ALOGE("Installed zlib is not compatible with linked version (%s)",
888        ZLIB_VERSION);
889    } else {
890      ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr);
891    }
892
893    return kZlibError;
894  }
895
896  const uint32_t uncompressed_length = entry->uncompressed_length;
897
898  uint32_t compressed_length = entry->compressed_length;
899  uint32_t write_count = 0;
900  do {
901    /* read as much as we can */
902    if (zstream.avail_in == 0) {
903      const ssize_t getSize = (compressed_length > kBufSize) ? kBufSize : compressed_length;
904      const ssize_t actual = TEMP_FAILURE_RETRY(read(fd, read_buf, getSize));
905      if (actual != getSize) {
906        ALOGW("Zip: inflate read failed (%zd vs %zd)", actual, getSize);
907        result = kIoError;
908        goto z_bail;
909      }
910
911      compressed_length -= getSize;
912
913      zstream.next_in = read_buf;
914      zstream.avail_in = getSize;
915    }
916
917    /* uncompress the data */
918    zerr = inflate(&zstream, Z_NO_FLUSH);
919    if (zerr != Z_OK && zerr != Z_STREAM_END) {
920      ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)",
921          zerr, zstream.next_in, zstream.avail_in,
922          zstream.next_out, zstream.avail_out);
923      result = kZlibError;
924      goto z_bail;
925    }
926
927    /* write when we're full or when we're done */
928    if (zstream.avail_out == 0 ||
929      (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) {
930      const size_t write_size = zstream.next_out - write_buf;
931      // The file might have declared a bogus length.
932      if (write_size + write_count > length) {
933        goto z_bail;
934      }
935      memcpy(begin + write_count, write_buf, write_size);
936      write_count += write_size;
937
938      zstream.next_out = write_buf;
939      zstream.avail_out = kBufSize;
940    }
941  } while (zerr == Z_OK);
942
943  assert(zerr == Z_STREAM_END);     /* other errors should've been caught */
944
945  // stream.adler holds the crc32 value for such streams.
946  *crc_out = zstream.adler;
947
948  if (zstream.total_out != uncompressed_length || compressed_length != 0) {
949    ALOGW("Zip: size mismatch on inflated file (%ld vs %u)",
950        zstream.total_out, uncompressed_length);
951    result = kInconsistentInformation;
952    goto z_bail;
953  }
954
955  result = 0;
956
957z_bail:
958  inflateEnd(&zstream);    /* free up any allocated structures */
959
960  return result;
961}
962
963int32_t ExtractToMemory(ZipArchiveHandle handle,
964                        ZipEntry* entry, uint8_t* begin, uint32_t size) {
965  ZipArchive* archive = (ZipArchive*) handle;
966  const uint16_t method = entry->method;
967  off64_t data_offset = entry->offset;
968
969  if (lseek64(archive->fd, data_offset, SEEK_SET) != data_offset) {
970    ALOGW("Zip: lseek to data at %" PRId64 " failed", data_offset);
971    return kIoError;
972  }
973
974  // this should default to kUnknownCompressionMethod.
975  int32_t return_value = -1;
976  uint64_t crc = 0;
977  if (method == kCompressStored) {
978    return_value = CopyFileToFile(archive->fd, begin, size, &crc);
979  } else if (method == kCompressDeflated) {
980    return_value = InflateToFile(archive->fd, entry, begin, size, &crc);
981  }
982
983  if (!return_value && entry->has_data_descriptor) {
984    return_value = UpdateEntryFromDataDescriptor(archive->fd, entry);
985    if (return_value) {
986      return return_value;
987    }
988  }
989
990  // TODO: Fix this check by passing the right flags to inflate2 so that
991  // it calculates the CRC for us.
992  if (entry->crc32 != crc && false) {
993    ALOGW("Zip: crc mismatch: expected %u, was %" PRIu64, entry->crc32, crc);
994    return kInconsistentInformation;
995  }
996
997  return return_value;
998}
999
1000int32_t ExtractEntryToFile(ZipArchiveHandle handle,
1001                           ZipEntry* entry, int fd) {
1002  const int32_t declared_length = entry->uncompressed_length;
1003
1004  const off64_t current_offset = lseek64(fd, 0, SEEK_CUR);
1005  if (current_offset == -1) {
1006    ALOGW("Zip: unable to seek to current location on fd %d: %s", fd,
1007          strerror(errno));
1008    return kIoError;
1009  }
1010
1011  int result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length + current_offset));
1012  if (result == -1) {
1013    ALOGW("Zip: unable to truncate file to %" PRId64 ": %s",
1014          declared_length + current_offset, strerror(errno));
1015    return kIoError;
1016  }
1017
1018  // Don't attempt to map a region of length 0. We still need the
1019  // ftruncate() though, since the API guarantees that we will truncate
1020  // the file to the end of the uncompressed output.
1021  if (declared_length == 0) {
1022      return 0;
1023  }
1024
1025  android::FileMap* map  = MapFileSegment(fd, current_offset, declared_length,
1026                                          false, kTempMappingFileName);
1027  if (map == NULL) {
1028    return kMmapFailed;
1029  }
1030
1031  const int32_t error = ExtractToMemory(handle, entry,
1032                                        reinterpret_cast<uint8_t*>(map->getDataPtr()),
1033                                        map->getDataLength());
1034  map->release();
1035  return error;
1036}
1037
1038const char* ErrorCodeString(int32_t error_code) {
1039  if (error_code > kErrorMessageLowerBound && error_code < kErrorMessageUpperBound) {
1040    return kErrorMessages[error_code * -1];
1041  }
1042
1043  return kErrorMessages[0];
1044}
1045
1046int GetFileDescriptor(const ZipArchiveHandle handle) {
1047  return ((ZipArchive*) handle)->fd;
1048}
1049
1050