1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *    http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Read-only access to Zip archives, with minimal heap allocation.
19 */
20
21#include <assert.h>
22#include <errno.h>
23#include <fcntl.h>
24#include <inttypes.h>
25#include <limits.h>
26#include <log/log.h>
27#include <stdlib.h>
28#include <string.h>
29#include <unistd.h>
30#include <utils/Compat.h>
31#include <utils/FileMap.h>
32#include <zlib.h>
33
34#include <JNIHelp.h>  // TEMP_FAILURE_RETRY may or may not be in unistd
35
36#include "ziparchive/zip_archive.h"
37
38// This is for windows. If we don't open a file in binary mode, weird
39// things will happen.
40#ifndef O_BINARY
41#define O_BINARY 0
42#endif
43
44#define DISALLOW_IMPLICIT_CONSTRUCTORS(TypeName) \
45    TypeName(); \
46    TypeName(const TypeName&); \
47    void operator=(const TypeName&)
48
49// The "end of central directory" (EOCD) record. Each archive
50// contains exactly once such record which appears at the end of
51// the archive. It contains archive wide information like the
52// number of entries in the archive and the offset to the central
53// directory of the offset.
54struct EocdRecord {
55  static const uint32_t kSignature = 0x06054b50;
56
57  // End of central directory signature, should always be
58  // |kSignature|.
59  uint32_t eocd_signature;
60  // The number of the current "disk", i.e, the "disk" that this
61  // central directory is on.
62  //
63  // This implementation assumes that each archive spans a single
64  // disk only. i.e, that disk_num == 1.
65  uint16_t disk_num;
66  // The disk where the central directory starts.
67  //
68  // This implementation assumes that each archive spans a single
69  // disk only. i.e, that cd_start_disk == 1.
70  uint16_t cd_start_disk;
71  // The number of central directory records on this disk.
72  //
73  // This implementation assumes that each archive spans a single
74  // disk only. i.e, that num_records_on_disk == num_records.
75  uint16_t num_records_on_disk;
76  // The total number of central directory records.
77  uint16_t num_records;
78  // The size of the central directory (in bytes).
79  uint32_t cd_size;
80  // The offset of the start of the central directory, relative
81  // to the start of the file.
82  uint32_t cd_start_offset;
83  // Length of the central directory comment.
84  uint16_t comment_length;
85 private:
86  DISALLOW_IMPLICIT_CONSTRUCTORS(EocdRecord);
87} __attribute__((packed));
88
89// A structure representing the fixed length fields for a single
90// record in the central directory of the archive. In addition to
91// the fixed length fields listed here, each central directory
92// record contains a variable length "file_name" and "extra_field"
93// whose lengths are given by |file_name_length| and |extra_field_length|
94// respectively.
95struct CentralDirectoryRecord {
96  static const uint32_t kSignature = 0x02014b50;
97
98  // The start of record signature. Must be |kSignature|.
99  uint32_t record_signature;
100  // Tool version. Ignored by this implementation.
101  uint16_t version_made_by;
102  // Tool version. Ignored by this implementation.
103  uint16_t version_needed;
104  // The "general purpose bit flags" for this entry. The only
105  // flag value that we currently check for is the "data descriptor"
106  // flag.
107  uint16_t gpb_flags;
108  // The compression method for this entry, one of |kCompressStored|
109  // and |kCompressDeflated|.
110  uint16_t compression_method;
111  // The file modification time and date for this entry.
112  uint16_t last_mod_time;
113  uint16_t last_mod_date;
114  // The CRC-32 checksum for this entry.
115  uint32_t crc32;
116  // The compressed size (in bytes) of this entry.
117  uint32_t compressed_size;
118  // The uncompressed size (in bytes) of this entry.
119  uint32_t uncompressed_size;
120  // The length of the entry file name in bytes. The file name
121  // will appear immediately after this record.
122  uint16_t file_name_length;
123  // The length of the extra field info (in bytes). This data
124  // will appear immediately after the entry file name.
125  uint16_t extra_field_length;
126  // The length of the entry comment (in bytes). This data will
127  // appear immediately after the extra field.
128  uint16_t comment_length;
129  // The start disk for this entry. Ignored by this implementation).
130  uint16_t file_start_disk;
131  // File attributes. Ignored by this implementation.
132  uint16_t internal_file_attributes;
133  // File attributes. Ignored by this implementation.
134  uint32_t external_file_attributes;
135  // The offset to the local file header for this entry, from the
136  // beginning of this archive.
137  uint32_t local_file_header_offset;
138 private:
139  DISALLOW_IMPLICIT_CONSTRUCTORS(CentralDirectoryRecord);
140} __attribute__((packed));
141
142// The local file header for a given entry. This duplicates information
143// present in the central directory of the archive. It is an error for
144// the information here to be different from the central directory
145// information for a given entry.
146struct LocalFileHeader {
147  static const uint32_t kSignature = 0x04034b50;
148
149  // The local file header signature, must be |kSignature|.
150  uint32_t lfh_signature;
151  // Tool version. Ignored by this implementation.
152  uint16_t version_needed;
153  // The "general purpose bit flags" for this entry. The only
154  // flag value that we currently check for is the "data descriptor"
155  // flag.
156  uint16_t gpb_flags;
157  // The compression method for this entry, one of |kCompressStored|
158  // and |kCompressDeflated|.
159  uint16_t compression_method;
160  // The file modification time and date for this entry.
161  uint16_t last_mod_time;
162  uint16_t last_mod_date;
163  // The CRC-32 checksum for this entry.
164  uint32_t crc32;
165  // The compressed size (in bytes) of this entry.
166  uint32_t compressed_size;
167  // The uncompressed size (in bytes) of this entry.
168  uint32_t uncompressed_size;
169  // The length of the entry file name in bytes. The file name
170  // will appear immediately after this record.
171  uint16_t file_name_length;
172  // The length of the extra field info (in bytes). This data
173  // will appear immediately after the entry file name.
174  uint16_t extra_field_length;
175 private:
176  DISALLOW_IMPLICIT_CONSTRUCTORS(LocalFileHeader);
177} __attribute__((packed));
178
179struct DataDescriptor {
180  // The *optional* data descriptor start signature.
181  static const uint32_t kOptSignature = 0x08074b50;
182
183  // CRC-32 checksum of the entry.
184  uint32_t crc32;
185  // Compressed size of the entry.
186  uint32_t compressed_size;
187  // Uncompressed size of the entry.
188  uint32_t uncompressed_size;
189 private:
190  DISALLOW_IMPLICIT_CONSTRUCTORS(DataDescriptor);
191} __attribute__((packed));
192
193#undef DISALLOW_IMPLICIT_CONSTRUCTORS
194
195static const uint32_t kGPBDDFlagMask = 0x0008;         // mask value that signifies that the entry has a DD
196static const uint32_t kMaxErrorLen = 1024;
197
198// The maximum size of a central directory or a file
199// comment in bytes.
200static const uint32_t kMaxCommentLen = 65535;
201
202// The maximum number of bytes to scan backwards for the EOCD start.
203static const uint32_t kMaxEOCDSearch = kMaxCommentLen + sizeof(EocdRecord);
204
205static const char* kErrorMessages[] = {
206  "Unknown return code.",
207  "Iteration ended",
208  "Zlib error",
209  "Invalid file",
210  "Invalid handle",
211  "Duplicate entries in archive",
212  "Empty archive",
213  "Entry not found",
214  "Invalid offset",
215  "Inconsistent information",
216  "Invalid entry name",
217  "I/O Error",
218  "File mapping failed"
219};
220
221static const int32_t kErrorMessageUpperBound = 0;
222
223static const int32_t kIterationEnd = -1;
224
225// We encountered a Zlib error when inflating a stream from this file.
226// Usually indicates file corruption.
227static const int32_t kZlibError = -2;
228
229// The input file cannot be processed as a zip archive. Usually because
230// it's too small, too large or does not have a valid signature.
231static const int32_t kInvalidFile = -3;
232
233// An invalid iteration / ziparchive handle was passed in as an input
234// argument.
235static const int32_t kInvalidHandle = -4;
236
237// The zip archive contained two (or possibly more) entries with the same
238// name.
239static const int32_t kDuplicateEntry = -5;
240
241// The zip archive contains no entries.
242static const int32_t kEmptyArchive = -6;
243
244// The specified entry was not found in the archive.
245static const int32_t kEntryNotFound = -7;
246
247// The zip archive contained an invalid local file header pointer.
248static const int32_t kInvalidOffset = -8;
249
250// The zip archive contained inconsistent entry information. This could
251// be because the central directory & local file header did not agree, or
252// if the actual uncompressed length or crc32 do not match their declared
253// values.
254static const int32_t kInconsistentInformation = -9;
255
256// An invalid entry name was encountered.
257static const int32_t kInvalidEntryName = -10;
258
259// An I/O related system call (read, lseek, ftruncate, map) failed.
260static const int32_t kIoError = -11;
261
262// We were not able to mmap the central directory or entry contents.
263static const int32_t kMmapFailed = -12;
264
265static const int32_t kErrorMessageLowerBound = -13;
266
267static const char kTempMappingFileName[] = "zip: ExtractFileToFile";
268
269/*
270 * A Read-only Zip archive.
271 *
272 * We want "open" and "find entry by name" to be fast operations, and
273 * we want to use as little memory as possible.  We memory-map the zip
274 * central directory, and load a hash table with pointers to the filenames
275 * (which aren't null-terminated).  The other fields are at a fixed offset
276 * from the filename, so we don't need to extract those (but we do need
277 * to byte-read and endian-swap them every time we want them).
278 *
279 * It's possible that somebody has handed us a massive (~1GB) zip archive,
280 * so we can't expect to mmap the entire file.
281 *
282 * To speed comparisons when doing a lookup by name, we could make the mapping
283 * "private" (copy-on-write) and null-terminate the filenames after verifying
284 * the record structure.  However, this requires a private mapping of
285 * every page that the Central Directory touches.  Easier to tuck a copy
286 * of the string length into the hash table entry.
287 */
288struct ZipArchive {
289  /* open Zip archive */
290  const int fd;
291
292  /* mapped central directory area */
293  off64_t directory_offset;
294  android::FileMap* directory_map;
295
296  /* number of entries in the Zip archive */
297  uint16_t num_entries;
298
299  /*
300   * We know how many entries are in the Zip archive, so we can have a
301   * fixed-size hash table. We define a load factor of 0.75 and overallocat
302   * so the maximum number entries can never be higher than
303   * ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
304   */
305  uint32_t hash_table_size;
306  ZipEntryName* hash_table;
307
308  ZipArchive(const int fd) :
309      fd(fd),
310      directory_offset(0),
311      directory_map(NULL),
312      num_entries(0),
313      hash_table_size(0),
314      hash_table(NULL) {}
315
316  ~ZipArchive() {
317    if (fd >= 0) {
318      close(fd);
319    }
320
321    if (directory_map != NULL) {
322      directory_map->release();
323    }
324    free(hash_table);
325  }
326};
327
328// Returns 0 on success and negative values on failure.
329static android::FileMap* MapFileSegment(const int fd, const off64_t start,
330                                        const size_t length, const bool read_only,
331                                        const char* debug_file_name) {
332  android::FileMap* file_map = new android::FileMap;
333  const bool success = file_map->create(debug_file_name, fd, start, length, read_only);
334  if (!success) {
335    file_map->release();
336    return NULL;
337  }
338
339  return file_map;
340}
341
342static int32_t CopyFileToFile(int fd, uint8_t* begin, const uint32_t length, uint64_t *crc_out) {
343  static const uint32_t kBufSize = 32768;
344  uint8_t buf[kBufSize];
345
346  uint32_t count = 0;
347  uint64_t crc = 0;
348  while (count < length) {
349    uint32_t remaining = length - count;
350
351    // Safe conversion because kBufSize is narrow enough for a 32 bit signed
352    // value.
353    ssize_t get_size = (remaining > kBufSize) ? kBufSize : remaining;
354    ssize_t actual = TEMP_FAILURE_RETRY(read(fd, buf, get_size));
355
356    if (actual != get_size) {
357      ALOGW("CopyFileToFile: copy read failed (" ZD " vs " ZD ")", actual, get_size);
358      return kIoError;
359    }
360
361    memcpy(begin + count, buf, get_size);
362    crc = crc32(crc, buf, get_size);
363    count += get_size;
364  }
365
366  *crc_out = crc;
367
368  return 0;
369}
370
371/*
372 * Round up to the next highest power of 2.
373 *
374 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
375 */
376static uint32_t RoundUpPower2(uint32_t val) {
377  val--;
378  val |= val >> 1;
379  val |= val >> 2;
380  val |= val >> 4;
381  val |= val >> 8;
382  val |= val >> 16;
383  val++;
384
385  return val;
386}
387
388static uint32_t ComputeHash(const char* str, uint16_t len) {
389  uint32_t hash = 0;
390
391  while (len--) {
392    hash = hash * 31 + *str++;
393  }
394
395  return hash;
396}
397
398/*
399 * Convert a ZipEntry to a hash table index, verifying that it's in a
400 * valid range.
401 */
402static int64_t EntryToIndex(const ZipEntryName* hash_table,
403                            const uint32_t hash_table_size,
404                            const char* name, uint16_t length) {
405  const uint32_t hash = ComputeHash(name, length);
406
407  // NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
408  uint32_t ent = hash & (hash_table_size - 1);
409  while (hash_table[ent].name != NULL) {
410    if (hash_table[ent].name_length == length &&
411        memcmp(hash_table[ent].name, name, length) == 0) {
412      return ent;
413    }
414
415    ent = (ent + 1) & (hash_table_size - 1);
416  }
417
418  ALOGV("Zip: Unable to find entry %.*s", length, name);
419  return kEntryNotFound;
420}
421
422/*
423 * Add a new entry to the hash table.
424 */
425static int32_t AddToHash(ZipEntryName *hash_table, const uint64_t hash_table_size,
426                         const char* name, uint16_t length) {
427  const uint64_t hash = ComputeHash(name, length);
428  uint32_t ent = hash & (hash_table_size - 1);
429
430  /*
431   * We over-allocated the table, so we're guaranteed to find an empty slot.
432   * Further, we guarantee that the hashtable size is not 0.
433   */
434  while (hash_table[ent].name != NULL) {
435    if (hash_table[ent].name_length == length &&
436        memcmp(hash_table[ent].name, name, length) == 0) {
437      // We've found a duplicate entry. We don't accept it
438      ALOGW("Zip: Found duplicate entry %.*s", length, name);
439      return kDuplicateEntry;
440    }
441    ent = (ent + 1) & (hash_table_size - 1);
442  }
443
444  hash_table[ent].name = name;
445  hash_table[ent].name_length = length;
446  return 0;
447}
448
449static int32_t MapCentralDirectory0(int fd, const char* debug_file_name,
450                                    ZipArchive* archive, off64_t file_length,
451                                    off64_t read_amount, uint8_t* scan_buffer) {
452  const off64_t search_start = file_length - read_amount;
453
454  if (lseek64(fd, search_start, SEEK_SET) != search_start) {
455    ALOGW("Zip: seek %" PRId64 " failed: %s", static_cast<int64_t>(search_start),
456          strerror(errno));
457    return kIoError;
458  }
459  ssize_t actual = TEMP_FAILURE_RETRY(
460      read(fd, scan_buffer, static_cast<size_t>(read_amount)));
461  if (actual != static_cast<ssize_t>(read_amount)) {
462    ALOGW("Zip: read %" PRId64 " failed: %s", static_cast<int64_t>(read_amount),
463          strerror(errno));
464    return kIoError;
465  }
466
467  /*
468   * Scan backward for the EOCD magic.  In an archive without a trailing
469   * comment, we'll find it on the first try.  (We may want to consider
470   * doing an initial minimal read; if we don't find it, retry with a
471   * second read as above.)
472   */
473  int i = read_amount - sizeof(EocdRecord);
474  for (; i >= 0; i--) {
475    if (scan_buffer[i] == 0x50 &&
476        ((*reinterpret_cast<uint32_t*>(&scan_buffer[i])) == EocdRecord::kSignature)) {
477      ALOGV("+++ Found EOCD at buf+%d", i);
478      break;
479    }
480  }
481  if (i < 0) {
482    ALOGD("Zip: EOCD not found, %s is not zip", debug_file_name);
483    return kInvalidFile;
484  }
485
486  const off64_t eocd_offset = search_start + i;
487  const EocdRecord* eocd = reinterpret_cast<const EocdRecord*>(scan_buffer + i);
488  /*
489   * Verify that there's no trailing space at the end of the central directory
490   * and its comment.
491   */
492  const off64_t calculated_length = eocd_offset + sizeof(EocdRecord)
493      + eocd->comment_length;
494  if (calculated_length != file_length) {
495    ALOGW("Zip: %" PRId64 " extraneous bytes at the end of the central directory",
496          static_cast<int64_t>(file_length - calculated_length));
497    return kInvalidFile;
498  }
499
500  /*
501   * Grab the CD offset and size, and the number of entries in the
502   * archive and verify that they look reasonable.
503   */
504  if (eocd->cd_start_offset + eocd->cd_size > eocd_offset) {
505    ALOGW("Zip: bad offsets (dir %" PRIu32 ", size %" PRIu32 ", eocd %" PRId64 ")",
506        eocd->cd_start_offset, eocd->cd_size, static_cast<int64_t>(eocd_offset));
507    return kInvalidOffset;
508  }
509  if (eocd->num_records == 0) {
510    ALOGW("Zip: empty archive?");
511    return kEmptyArchive;
512  }
513
514  ALOGV("+++ num_entries=%" PRIu32 "dir_size=%" PRIu32 " dir_offset=%" PRIu32,
515        eocd->num_records, eocd->cd_size, eocd->cd_start_offset);
516
517  /*
518   * It all looks good.  Create a mapping for the CD, and set the fields
519   * in archive.
520   */
521  android::FileMap* map = MapFileSegment(fd,
522      static_cast<off64_t>(eocd->cd_start_offset),
523      static_cast<size_t>(eocd->cd_size),
524      true /* read only */, debug_file_name);
525  if (map == NULL) {
526    archive->directory_map = NULL;
527    return kMmapFailed;
528  }
529
530  archive->directory_map = map;
531  archive->num_entries = eocd->num_records;
532  archive->directory_offset = eocd->cd_start_offset;
533
534  return 0;
535}
536
537/*
538 * Find the zip Central Directory and memory-map it.
539 *
540 * On success, returns 0 after populating fields from the EOCD area:
541 *   directory_offset
542 *   directory_map
543 *   num_entries
544 */
545static int32_t MapCentralDirectory(int fd, const char* debug_file_name,
546                                   ZipArchive* archive) {
547
548  // Test file length. We use lseek64 to make sure the file
549  // is small enough to be a zip file (Its size must be less than
550  // 0xffffffff bytes).
551  off64_t file_length = lseek64(fd, 0, SEEK_END);
552  if (file_length == -1) {
553    ALOGV("Zip: lseek on fd %d failed", fd);
554    return kInvalidFile;
555  }
556
557  if (file_length > (off64_t) 0xffffffff) {
558    ALOGV("Zip: zip file too long %" PRId64, static_cast<int64_t>(file_length));
559    return kInvalidFile;
560  }
561
562  if (file_length < static_cast<off64_t>(sizeof(EocdRecord))) {
563    ALOGV("Zip: length %" PRId64 " is too small to be zip", static_cast<int64_t>(file_length));
564    return kInvalidFile;
565  }
566
567  /*
568   * Perform the traditional EOCD snipe hunt.
569   *
570   * We're searching for the End of Central Directory magic number,
571   * which appears at the start of the EOCD block.  It's followed by
572   * 18 bytes of EOCD stuff and up to 64KB of archive comment.  We
573   * need to read the last part of the file into a buffer, dig through
574   * it to find the magic number, parse some values out, and use those
575   * to determine the extent of the CD.
576   *
577   * We start by pulling in the last part of the file.
578   */
579  off64_t read_amount = kMaxEOCDSearch;
580  if (file_length < read_amount) {
581    read_amount = file_length;
582  }
583
584  uint8_t* scan_buffer = reinterpret_cast<uint8_t*>(malloc(read_amount));
585  int32_t result = MapCentralDirectory0(fd, debug_file_name, archive,
586                                        file_length, read_amount, scan_buffer);
587
588  free(scan_buffer);
589  return result;
590}
591
592/*
593 * Parses the Zip archive's Central Directory.  Allocates and populates the
594 * hash table.
595 *
596 * Returns 0 on success.
597 */
598static int32_t ParseZipArchive(ZipArchive* archive) {
599  int32_t result = -1;
600  const uint8_t* const cd_ptr = (const uint8_t*) archive->directory_map->getDataPtr();
601  const size_t cd_length = archive->directory_map->getDataLength();
602  const uint16_t num_entries = archive->num_entries;
603
604  /*
605   * Create hash table.  We have a minimum 75% load factor, possibly as
606   * low as 50% after we round off to a power of 2.  There must be at
607   * least one unused entry to avoid an infinite loop during creation.
608   */
609  archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3);
610  archive->hash_table = (ZipEntryName*) calloc(archive->hash_table_size,
611      sizeof(ZipEntryName));
612
613  /*
614   * Walk through the central directory, adding entries to the hash
615   * table and verifying values.
616   */
617  const uint8_t* const cd_end = cd_ptr + cd_length;
618  const uint8_t* ptr = cd_ptr;
619  for (uint16_t i = 0; i < num_entries; i++) {
620    const CentralDirectoryRecord* cdr =
621        reinterpret_cast<const CentralDirectoryRecord*>(ptr);
622    if (cdr->record_signature != CentralDirectoryRecord::kSignature) {
623      ALOGW("Zip: missed a central dir sig (at %" PRIu16 ")", i);
624      goto bail;
625    }
626
627    if (ptr + sizeof(CentralDirectoryRecord) > cd_end) {
628      ALOGW("Zip: ran off the end (at %" PRIu16 ")", i);
629      goto bail;
630    }
631
632    const off64_t local_header_offset = cdr->local_file_header_offset;
633    if (local_header_offset >= archive->directory_offset) {
634      ALOGW("Zip: bad LFH offset %" PRId64 " at entry %" PRIu16, (int64_t)local_header_offset, i);
635      goto bail;
636    }
637
638    const uint16_t file_name_length = cdr->file_name_length;
639    const uint16_t extra_length = cdr->extra_field_length;
640    const uint16_t comment_length = cdr->comment_length;
641    const char* file_name = reinterpret_cast<const char*>(ptr + sizeof(CentralDirectoryRecord));
642
643    /* check that file name doesn't contain \0 character */
644    if (memchr(file_name, 0, file_name_length) != NULL) {
645      ALOGW("Zip: entry name can't contain \\0 character");
646      goto bail;
647    }
648
649    /* add the CDE filename to the hash table */
650    const int add_result = AddToHash(archive->hash_table,
651        archive->hash_table_size, file_name, file_name_length);
652    if (add_result) {
653      ALOGW("Zip: Error adding entry to hash table %d", add_result);
654      result = add_result;
655      goto bail;
656    }
657
658    ptr += sizeof(CentralDirectoryRecord) + file_name_length + extra_length + comment_length;
659    if ((ptr - cd_ptr) > static_cast<int64_t>(cd_length)) {
660      ALOGW("Zip: bad CD advance (%tu vs %zu) at entry %" PRIu16,
661          ptr - cd_ptr, cd_length, i);
662      goto bail;
663    }
664  }
665  ALOGV("+++ zip good scan %" PRIu16 " entries", num_entries);
666
667  result = 0;
668
669bail:
670  return result;
671}
672
673static int32_t OpenArchiveInternal(ZipArchive* archive,
674                                   const char* debug_file_name) {
675  int32_t result = -1;
676  if ((result = MapCentralDirectory(archive->fd, debug_file_name, archive))) {
677    return result;
678  }
679
680  if ((result = ParseZipArchive(archive))) {
681    return result;
682  }
683
684  return 0;
685}
686
687int32_t OpenArchiveFd(int fd, const char* debug_file_name,
688                      ZipArchiveHandle* handle) {
689  ZipArchive* archive = new ZipArchive(fd);
690  *handle = archive;
691  return OpenArchiveInternal(archive, debug_file_name);
692}
693
694int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle) {
695  const int fd = open(fileName, O_RDONLY | O_BINARY, 0);
696  ZipArchive* archive = new ZipArchive(fd);
697  *handle = archive;
698
699  if (fd < 0) {
700    ALOGW("Unable to open '%s': %s", fileName, strerror(errno));
701    return kIoError;
702  }
703  return OpenArchiveInternal(archive, fileName);
704}
705
706/*
707 * Close a ZipArchive, closing the file and freeing the contents.
708 */
709void CloseArchive(ZipArchiveHandle handle) {
710  ZipArchive* archive = (ZipArchive*) handle;
711  ALOGV("Closing archive %p", archive);
712  delete archive;
713}
714
715static int32_t UpdateEntryFromDataDescriptor(int fd,
716                                             ZipEntry *entry) {
717  uint8_t ddBuf[sizeof(DataDescriptor) + sizeof(DataDescriptor::kOptSignature)];
718  ssize_t actual = TEMP_FAILURE_RETRY(read(fd, ddBuf, sizeof(ddBuf)));
719  if (actual != sizeof(ddBuf)) {
720    return kIoError;
721  }
722
723  const uint32_t ddSignature = *(reinterpret_cast<const uint32_t*>(ddBuf));
724  const uint16_t offset = (ddSignature == DataDescriptor::kOptSignature) ? 4 : 0;
725  const DataDescriptor* descriptor = reinterpret_cast<const DataDescriptor*>(ddBuf + offset);
726
727  entry->crc32 = descriptor->crc32;
728  entry->compressed_length = descriptor->compressed_size;
729  entry->uncompressed_length = descriptor->uncompressed_size;
730
731  return 0;
732}
733
734// Attempts to read |len| bytes into |buf| at offset |off|.
735//
736// This method uses pread64 on platforms that support it and
737// lseek64 + read on platforms that don't. This implies that
738// callers should not rely on the |fd| offset being incremented
739// as a side effect of this call.
740static inline ssize_t ReadAtOffset(int fd, uint8_t* buf, size_t len,
741                                   off64_t off) {
742#ifdef HAVE_PREAD
743  return TEMP_FAILURE_RETRY(pread64(fd, buf, len, off));
744#else
745  // The only supported platform that doesn't support pread at the moment
746  // is Windows. Only recent versions of windows support unix like forks,
747  // and even there the semantics are quite different.
748  if (lseek64(fd, off, SEEK_SET) != off) {
749    ALOGW("Zip: failed seek to offset %" PRId64, off);
750    return kIoError;
751  }
752
753  return TEMP_FAILURE_RETRY(read(fd, buf, len));
754#endif  // HAVE_PREAD
755}
756
757static int32_t FindEntry(const ZipArchive* archive, const int ent,
758                         ZipEntry* data) {
759  const uint16_t nameLen = archive->hash_table[ent].name_length;
760  const char* name = archive->hash_table[ent].name;
761
762  // Recover the start of the central directory entry from the filename
763  // pointer.  The filename is the first entry past the fixed-size data,
764  // so we can just subtract back from that.
765  const uint8_t* ptr = reinterpret_cast<const uint8_t*>(name);
766  ptr -= sizeof(CentralDirectoryRecord);
767
768  // This is the base of our mmapped region, we have to sanity check that
769  // the name that's in the hash table is a pointer to a location within
770  // this mapped region.
771  const uint8_t* base_ptr = reinterpret_cast<const uint8_t*>(
772    archive->directory_map->getDataPtr());
773  if (ptr < base_ptr || ptr > base_ptr + archive->directory_map->getDataLength()) {
774    ALOGW("Zip: Invalid entry pointer");
775    return kInvalidOffset;
776  }
777
778  const CentralDirectoryRecord *cdr =
779      reinterpret_cast<const CentralDirectoryRecord*>(ptr);
780
781  // The offset of the start of the central directory in the zipfile.
782  // We keep this lying around so that we can sanity check all our lengths
783  // and our per-file structures.
784  const off64_t cd_offset = archive->directory_offset;
785
786  // Fill out the compression method, modification time, crc32
787  // and other interesting attributes from the central directory. These
788  // will later be compared against values from the local file header.
789  data->method = cdr->compression_method;
790  data->mod_time = cdr->last_mod_time;
791  data->crc32 = cdr->crc32;
792  data->compressed_length = cdr->compressed_size;
793  data->uncompressed_length = cdr->uncompressed_size;
794
795  // Figure out the local header offset from the central directory. The
796  // actual file data will begin after the local header and the name /
797  // extra comments.
798  const off64_t local_header_offset = cdr->local_file_header_offset;
799  if (local_header_offset + static_cast<off64_t>(sizeof(LocalFileHeader)) >= cd_offset) {
800    ALOGW("Zip: bad local hdr offset in zip");
801    return kInvalidOffset;
802  }
803
804  uint8_t lfh_buf[sizeof(LocalFileHeader)];
805  ssize_t actual = ReadAtOffset(archive->fd, lfh_buf, sizeof(lfh_buf),
806                                 local_header_offset);
807  if (actual != sizeof(lfh_buf)) {
808    ALOGW("Zip: failed reading lfh name from offset %" PRId64, (int64_t)local_header_offset);
809    return kIoError;
810  }
811
812  const LocalFileHeader *lfh = reinterpret_cast<const LocalFileHeader*>(lfh_buf);
813
814  if (lfh->lfh_signature != LocalFileHeader::kSignature) {
815    ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64,
816        static_cast<int64_t>(local_header_offset));
817    return kInvalidOffset;
818  }
819
820  // Paranoia: Match the values specified in the local file header
821  // to those specified in the central directory.
822  if ((lfh->gpb_flags & kGPBDDFlagMask) == 0) {
823    data->has_data_descriptor = 0;
824    if (data->compressed_length != lfh->compressed_size
825        || data->uncompressed_length != lfh->uncompressed_size
826        || data->crc32 != lfh->crc32) {
827      ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32
828        ", %" PRIx32 "}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}",
829        data->compressed_length, data->uncompressed_length, data->crc32,
830        lfh->compressed_size, lfh->uncompressed_size, lfh->crc32);
831      return kInconsistentInformation;
832    }
833  } else {
834    data->has_data_descriptor = 1;
835  }
836
837  // Check that the local file header name matches the declared
838  // name in the central directory.
839  if (lfh->file_name_length == nameLen) {
840    const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
841    if (name_offset + lfh->file_name_length >= cd_offset) {
842      ALOGW("Zip: Invalid declared length");
843      return kInvalidOffset;
844    }
845
846    uint8_t* name_buf = (uint8_t*) malloc(nameLen);
847    ssize_t actual = ReadAtOffset(archive->fd, name_buf, nameLen,
848                                  name_offset);
849
850    if (actual != nameLen) {
851      ALOGW("Zip: failed reading lfh name from offset %" PRId64, (int64_t)name_offset);
852      free(name_buf);
853      return kIoError;
854    }
855
856    if (memcmp(name, name_buf, nameLen)) {
857      free(name_buf);
858      return kInconsistentInformation;
859    }
860
861    free(name_buf);
862  } else {
863    ALOGW("Zip: lfh name did not match central directory.");
864    return kInconsistentInformation;
865  }
866
867  const off64_t data_offset = local_header_offset + sizeof(LocalFileHeader)
868      + lfh->file_name_length + lfh->extra_field_length;
869  if (data_offset > cd_offset) {
870    ALOGW("Zip: bad data offset %" PRId64 " in zip", (int64_t)data_offset);
871    return kInvalidOffset;
872  }
873
874  if ((off64_t)(data_offset + data->compressed_length) > cd_offset) {
875    ALOGW("Zip: bad compressed length in zip (%" PRId64 " + %" PRIu32 " > %" PRId64 ")",
876      (int64_t)data_offset, data->compressed_length, (int64_t)cd_offset);
877    return kInvalidOffset;
878  }
879
880  if (data->method == kCompressStored &&
881    (off64_t)(data_offset + data->uncompressed_length) > cd_offset) {
882     ALOGW("Zip: bad uncompressed length in zip (%" PRId64 " + %" PRIu32 " > %" PRId64 ")",
883       (int64_t)data_offset, data->uncompressed_length, (int64_t)cd_offset);
884     return kInvalidOffset;
885  }
886
887  data->offset = data_offset;
888  return 0;
889}
890
891struct IterationHandle {
892  uint32_t position;
893  const char* prefix;
894  uint16_t prefix_len;
895  ZipArchive* archive;
896};
897
898int32_t StartIteration(ZipArchiveHandle handle, void** cookie_ptr, const char* prefix) {
899  ZipArchive* archive = (ZipArchive *) handle;
900
901  if (archive == NULL || archive->hash_table == NULL) {
902    ALOGW("Zip: Invalid ZipArchiveHandle");
903    return kInvalidHandle;
904  }
905
906  IterationHandle* cookie = (IterationHandle*) malloc(sizeof(IterationHandle));
907  cookie->position = 0;
908  cookie->prefix = prefix;
909  cookie->archive = archive;
910  if (prefix != NULL) {
911    cookie->prefix_len = strlen(prefix);
912  }
913
914  *cookie_ptr = cookie ;
915  return 0;
916}
917
918int32_t FindEntry(const ZipArchiveHandle handle, const char* entryName,
919                  ZipEntry* data) {
920  const ZipArchive* archive = (ZipArchive*) handle;
921  const int nameLen = strlen(entryName);
922  if (nameLen == 0 || nameLen > 65535) {
923    ALOGW("Zip: Invalid filename %s", entryName);
924    return kInvalidEntryName;
925  }
926
927  const int64_t ent = EntryToIndex(archive->hash_table,
928    archive->hash_table_size, entryName, nameLen);
929
930  if (ent < 0) {
931    ALOGV("Zip: Could not find entry %.*s", nameLen, entryName);
932    return ent;
933  }
934
935  return FindEntry(archive, ent, data);
936}
937
938int32_t Next(void* cookie, ZipEntry* data, ZipEntryName* name) {
939  IterationHandle* handle = (IterationHandle *) cookie;
940  if (handle == NULL) {
941    return kInvalidHandle;
942  }
943
944  ZipArchive* archive = handle->archive;
945  if (archive == NULL || archive->hash_table == NULL) {
946    ALOGW("Zip: Invalid ZipArchiveHandle");
947    return kInvalidHandle;
948  }
949
950  const uint32_t currentOffset = handle->position;
951  const uint32_t hash_table_length = archive->hash_table_size;
952  const ZipEntryName *hash_table = archive->hash_table;
953
954  for (uint32_t i = currentOffset; i < hash_table_length; ++i) {
955    if (hash_table[i].name != NULL &&
956        (handle->prefix == NULL ||
957         (memcmp(handle->prefix, hash_table[i].name, handle->prefix_len) == 0))) {
958      handle->position = (i + 1);
959      const int error = FindEntry(archive, i, data);
960      if (!error) {
961        name->name = hash_table[i].name;
962        name->name_length = hash_table[i].name_length;
963      }
964
965      return error;
966    }
967  }
968
969  handle->position = 0;
970  return kIterationEnd;
971}
972
973static int32_t InflateToFile(int fd, const ZipEntry* entry,
974                             uint8_t* begin, uint32_t length,
975                             uint64_t* crc_out) {
976  int32_t result = -1;
977  const uint32_t kBufSize = 32768;
978  uint8_t read_buf[kBufSize];
979  uint8_t write_buf[kBufSize];
980  z_stream zstream;
981  int zerr;
982
983  /*
984   * Initialize the zlib stream struct.
985   */
986  memset(&zstream, 0, sizeof(zstream));
987  zstream.zalloc = Z_NULL;
988  zstream.zfree = Z_NULL;
989  zstream.opaque = Z_NULL;
990  zstream.next_in = NULL;
991  zstream.avail_in = 0;
992  zstream.next_out = (Bytef*) write_buf;
993  zstream.avail_out = kBufSize;
994  zstream.data_type = Z_UNKNOWN;
995
996  /*
997   * Use the undocumented "negative window bits" feature to tell zlib
998   * that there's no zlib header waiting for it.
999   */
1000  zerr = inflateInit2(&zstream, -MAX_WBITS);
1001  if (zerr != Z_OK) {
1002    if (zerr == Z_VERSION_ERROR) {
1003      ALOGE("Installed zlib is not compatible with linked version (%s)",
1004        ZLIB_VERSION);
1005    } else {
1006      ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr);
1007    }
1008
1009    return kZlibError;
1010  }
1011
1012  const uint32_t uncompressed_length = entry->uncompressed_length;
1013
1014  uint32_t compressed_length = entry->compressed_length;
1015  uint32_t write_count = 0;
1016  do {
1017    /* read as much as we can */
1018    if (zstream.avail_in == 0) {
1019      const ZD_TYPE getSize = (compressed_length > kBufSize) ? kBufSize : compressed_length;
1020      const ZD_TYPE actual = TEMP_FAILURE_RETRY(read(fd, read_buf, getSize));
1021      if (actual != getSize) {
1022        ALOGW("Zip: inflate read failed (" ZD " vs " ZD ")", actual, getSize);
1023        result = kIoError;
1024        goto z_bail;
1025      }
1026
1027      compressed_length -= getSize;
1028
1029      zstream.next_in = read_buf;
1030      zstream.avail_in = getSize;
1031    }
1032
1033    /* uncompress the data */
1034    zerr = inflate(&zstream, Z_NO_FLUSH);
1035    if (zerr != Z_OK && zerr != Z_STREAM_END) {
1036      ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)",
1037          zerr, zstream.next_in, zstream.avail_in,
1038          zstream.next_out, zstream.avail_out);
1039      result = kZlibError;
1040      goto z_bail;
1041    }
1042
1043    /* write when we're full or when we're done */
1044    if (zstream.avail_out == 0 ||
1045      (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) {
1046      const size_t write_size = zstream.next_out - write_buf;
1047      // The file might have declared a bogus length.
1048      if (write_size + write_count > length) {
1049        goto z_bail;
1050      }
1051      memcpy(begin + write_count, write_buf, write_size);
1052      write_count += write_size;
1053
1054      zstream.next_out = write_buf;
1055      zstream.avail_out = kBufSize;
1056    }
1057  } while (zerr == Z_OK);
1058
1059  assert(zerr == Z_STREAM_END);     /* other errors should've been caught */
1060
1061  // stream.adler holds the crc32 value for such streams.
1062  *crc_out = zstream.adler;
1063
1064  if (zstream.total_out != uncompressed_length || compressed_length != 0) {
1065    ALOGW("Zip: size mismatch on inflated file (%lu vs %" PRIu32 ")",
1066        zstream.total_out, uncompressed_length);
1067    result = kInconsistentInformation;
1068    goto z_bail;
1069  }
1070
1071  result = 0;
1072
1073z_bail:
1074  inflateEnd(&zstream);    /* free up any allocated structures */
1075
1076  return result;
1077}
1078
1079int32_t ExtractToMemory(ZipArchiveHandle handle,
1080                        ZipEntry* entry, uint8_t* begin, uint32_t size) {
1081  ZipArchive* archive = (ZipArchive*) handle;
1082  const uint16_t method = entry->method;
1083  off64_t data_offset = entry->offset;
1084
1085  if (lseek64(archive->fd, data_offset, SEEK_SET) != data_offset) {
1086    ALOGW("Zip: lseek to data at %" PRId64 " failed", (int64_t)data_offset);
1087    return kIoError;
1088  }
1089
1090  // this should default to kUnknownCompressionMethod.
1091  int32_t return_value = -1;
1092  uint64_t crc = 0;
1093  if (method == kCompressStored) {
1094    return_value = CopyFileToFile(archive->fd, begin, size, &crc);
1095  } else if (method == kCompressDeflated) {
1096    return_value = InflateToFile(archive->fd, entry, begin, size, &crc);
1097  }
1098
1099  if (!return_value && entry->has_data_descriptor) {
1100    return_value = UpdateEntryFromDataDescriptor(archive->fd, entry);
1101    if (return_value) {
1102      return return_value;
1103    }
1104  }
1105
1106  // TODO: Fix this check by passing the right flags to inflate2 so that
1107  // it calculates the CRC for us.
1108  if (entry->crc32 != crc && false) {
1109    ALOGW("Zip: crc mismatch: expected %" PRIu32 ", was %" PRIu64, entry->crc32, crc);
1110    return kInconsistentInformation;
1111  }
1112
1113  return return_value;
1114}
1115
1116int32_t ExtractEntryToFile(ZipArchiveHandle handle,
1117                           ZipEntry* entry, int fd) {
1118  const int32_t declared_length = entry->uncompressed_length;
1119
1120  const off64_t current_offset = lseek64(fd, 0, SEEK_CUR);
1121  if (current_offset == -1) {
1122    ALOGW("Zip: unable to seek to current location on fd %d: %s", fd,
1123          strerror(errno));
1124    return kIoError;
1125  }
1126
1127  int result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length + current_offset));
1128  if (result == -1) {
1129    ALOGW("Zip: unable to truncate file to %" PRId64 ": %s",
1130          (int64_t)(declared_length + current_offset), strerror(errno));
1131    return kIoError;
1132  }
1133
1134  // Don't attempt to map a region of length 0. We still need the
1135  // ftruncate() though, since the API guarantees that we will truncate
1136  // the file to the end of the uncompressed output.
1137  if (declared_length == 0) {
1138      return 0;
1139  }
1140
1141  android::FileMap* map  = MapFileSegment(fd, current_offset, declared_length,
1142                                          false, kTempMappingFileName);
1143  if (map == NULL) {
1144    return kMmapFailed;
1145  }
1146
1147  const int32_t error = ExtractToMemory(handle, entry,
1148                                        reinterpret_cast<uint8_t*>(map->getDataPtr()),
1149                                        map->getDataLength());
1150  map->release();
1151  return error;
1152}
1153
1154const char* ErrorCodeString(int32_t error_code) {
1155  if (error_code > kErrorMessageLowerBound && error_code < kErrorMessageUpperBound) {
1156    return kErrorMessages[error_code * -1];
1157  }
1158
1159  return kErrorMessages[0];
1160}
1161
1162int GetFileDescriptor(const ZipArchiveHandle handle) {
1163  return ((ZipArchive*) handle)->fd;
1164}
1165
1166