1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *    http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Read-only access to Zip archives, with minimal heap allocation.
19 */
20
21#include <assert.h>
22#include <errno.h>
23#include <fcntl.h>
24#include <inttypes.h>
25#include <limits.h>
26#include <stdlib.h>
27#include <string.h>
28#include <unistd.h>
29
30#include <memory>
31#include <vector>
32
33#include "android-base/file.h"
34#include "android-base/macros.h"  // TEMP_FAILURE_RETRY may or may not be in unistd
35#include "android-base/memory.h"
36#include "log/log.h"
37#include "utils/Compat.h"
38#include "utils/FileMap.h"
39#include "ziparchive/zip_archive.h"
40#include "zlib.h"
41
42#include "entry_name_utils-inl.h"
43#include "zip_archive_common.h"
44#include "zip_archive_private.h"
45
46using android::base::get_unaligned;
47
48// This is for windows. If we don't open a file in binary mode, weird
49// things will happen.
50#ifndef O_BINARY
51#define O_BINARY 0
52#endif
53
54// The maximum number of bytes to scan backwards for the EOCD start.
55static const uint32_t kMaxEOCDSearch = kMaxCommentLen + sizeof(EocdRecord);
56
57static const char* kErrorMessages[] = {
58  "Unknown return code.",
59  "Iteration ended",
60  "Zlib error",
61  "Invalid file",
62  "Invalid handle",
63  "Duplicate entries in archive",
64  "Empty archive",
65  "Entry not found",
66  "Invalid offset",
67  "Inconsistent information",
68  "Invalid entry name",
69  "I/O Error",
70  "File mapping failed"
71};
72
73static const int32_t kErrorMessageUpperBound = 0;
74
75static const int32_t kIterationEnd = -1;
76
77// We encountered a Zlib error when inflating a stream from this file.
78// Usually indicates file corruption.
79static const int32_t kZlibError = -2;
80
81// The input file cannot be processed as a zip archive. Usually because
82// it's too small, too large or does not have a valid signature.
83static const int32_t kInvalidFile = -3;
84
85// An invalid iteration / ziparchive handle was passed in as an input
86// argument.
87static const int32_t kInvalidHandle = -4;
88
89// The zip archive contained two (or possibly more) entries with the same
90// name.
91static const int32_t kDuplicateEntry = -5;
92
93// The zip archive contains no entries.
94static const int32_t kEmptyArchive = -6;
95
96// The specified entry was not found in the archive.
97static const int32_t kEntryNotFound = -7;
98
99// The zip archive contained an invalid local file header pointer.
100static const int32_t kInvalidOffset = -8;
101
102// The zip archive contained inconsistent entry information. This could
103// be because the central directory & local file header did not agree, or
104// if the actual uncompressed length or crc32 do not match their declared
105// values.
106static const int32_t kInconsistentInformation = -9;
107
108// An invalid entry name was encountered.
109static const int32_t kInvalidEntryName = -10;
110
111// An I/O related system call (read, lseek, ftruncate, map) failed.
112static const int32_t kIoError = -11;
113
114// We were not able to mmap the central directory or entry contents.
115static const int32_t kMmapFailed = -12;
116
117static const int32_t kErrorMessageLowerBound = -13;
118
119/*
120 * A Read-only Zip archive.
121 *
122 * We want "open" and "find entry by name" to be fast operations, and
123 * we want to use as little memory as possible.  We memory-map the zip
124 * central directory, and load a hash table with pointers to the filenames
125 * (which aren't null-terminated).  The other fields are at a fixed offset
126 * from the filename, so we don't need to extract those (but we do need
127 * to byte-read and endian-swap them every time we want them).
128 *
129 * It's possible that somebody has handed us a massive (~1GB) zip archive,
130 * so we can't expect to mmap the entire file.
131 *
132 * To speed comparisons when doing a lookup by name, we could make the mapping
133 * "private" (copy-on-write) and null-terminate the filenames after verifying
134 * the record structure.  However, this requires a private mapping of
135 * every page that the Central Directory touches.  Easier to tuck a copy
136 * of the string length into the hash table entry.
137 */
138
139/*
140 * Round up to the next highest power of 2.
141 *
142 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
143 */
144static uint32_t RoundUpPower2(uint32_t val) {
145  val--;
146  val |= val >> 1;
147  val |= val >> 2;
148  val |= val >> 4;
149  val |= val >> 8;
150  val |= val >> 16;
151  val++;
152
153  return val;
154}
155
156static uint32_t ComputeHash(const ZipString& name) {
157  uint32_t hash = 0;
158  uint16_t len = name.name_length;
159  const uint8_t* str = name.name;
160
161  while (len--) {
162    hash = hash * 31 + *str++;
163  }
164
165  return hash;
166}
167
168/*
169 * Convert a ZipEntry to a hash table index, verifying that it's in a
170 * valid range.
171 */
172static int64_t EntryToIndex(const ZipString* hash_table,
173                            const uint32_t hash_table_size,
174                            const ZipString& name) {
175  const uint32_t hash = ComputeHash(name);
176
177  // NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
178  uint32_t ent = hash & (hash_table_size - 1);
179  while (hash_table[ent].name != NULL) {
180    if (hash_table[ent] == name) {
181      return ent;
182    }
183
184    ent = (ent + 1) & (hash_table_size - 1);
185  }
186
187  ALOGV("Zip: Unable to find entry %.*s", name.name_length, name.name);
188  return kEntryNotFound;
189}
190
191/*
192 * Add a new entry to the hash table.
193 */
194static int32_t AddToHash(ZipString *hash_table, const uint64_t hash_table_size,
195                         const ZipString& name) {
196  const uint64_t hash = ComputeHash(name);
197  uint32_t ent = hash & (hash_table_size - 1);
198
199  /*
200   * We over-allocated the table, so we're guaranteed to find an empty slot.
201   * Further, we guarantee that the hashtable size is not 0.
202   */
203  while (hash_table[ent].name != NULL) {
204    if (hash_table[ent] == name) {
205      // We've found a duplicate entry. We don't accept it
206      ALOGW("Zip: Found duplicate entry %.*s", name.name_length, name.name);
207      return kDuplicateEntry;
208    }
209    ent = (ent + 1) & (hash_table_size - 1);
210  }
211
212  hash_table[ent].name = name.name;
213  hash_table[ent].name_length = name.name_length;
214  return 0;
215}
216
217static int32_t MapCentralDirectory0(int fd, const char* debug_file_name,
218                                    ZipArchive* archive, off64_t file_length,
219                                    off64_t read_amount, uint8_t* scan_buffer) {
220  const off64_t search_start = file_length - read_amount;
221
222  if (lseek64(fd, search_start, SEEK_SET) != search_start) {
223    ALOGW("Zip: seek %" PRId64 " failed: %s", static_cast<int64_t>(search_start),
224          strerror(errno));
225    return kIoError;
226  }
227  if (!android::base::ReadFully(fd, scan_buffer, static_cast<size_t>(read_amount))) {
228    ALOGW("Zip: read %" PRId64 " failed: %s", static_cast<int64_t>(read_amount),
229          strerror(errno));
230    return kIoError;
231  }
232
233  /*
234   * Scan backward for the EOCD magic.  In an archive without a trailing
235   * comment, we'll find it on the first try.  (We may want to consider
236   * doing an initial minimal read; if we don't find it, retry with a
237   * second read as above.)
238   */
239  int i = read_amount - sizeof(EocdRecord);
240  for (; i >= 0; i--) {
241    if (scan_buffer[i] == 0x50) {
242      uint32_t* sig_addr = reinterpret_cast<uint32_t*>(&scan_buffer[i]);
243      if (get_unaligned<uint32_t>(sig_addr) == EocdRecord::kSignature) {
244        ALOGV("+++ Found EOCD at buf+%d", i);
245        break;
246      }
247    }
248  }
249  if (i < 0) {
250    ALOGD("Zip: EOCD not found, %s is not zip", debug_file_name);
251    return kInvalidFile;
252  }
253
254  const off64_t eocd_offset = search_start + i;
255  const EocdRecord* eocd = reinterpret_cast<const EocdRecord*>(scan_buffer + i);
256  /*
257   * Verify that there's no trailing space at the end of the central directory
258   * and its comment.
259   */
260  const off64_t calculated_length = eocd_offset + sizeof(EocdRecord)
261      + eocd->comment_length;
262  if (calculated_length != file_length) {
263    ALOGW("Zip: %" PRId64 " extraneous bytes at the end of the central directory",
264          static_cast<int64_t>(file_length - calculated_length));
265    return kInvalidFile;
266  }
267
268  /*
269   * Grab the CD offset and size, and the number of entries in the
270   * archive and verify that they look reasonable.
271   */
272  if (eocd->cd_start_offset + eocd->cd_size > eocd_offset) {
273    ALOGW("Zip: bad offsets (dir %" PRIu32 ", size %" PRIu32 ", eocd %" PRId64 ")",
274        eocd->cd_start_offset, eocd->cd_size, static_cast<int64_t>(eocd_offset));
275    return kInvalidOffset;
276  }
277  if (eocd->num_records == 0) {
278    ALOGW("Zip: empty archive?");
279    return kEmptyArchive;
280  }
281
282  ALOGV("+++ num_entries=%" PRIu32 " dir_size=%" PRIu32 " dir_offset=%" PRIu32,
283        eocd->num_records, eocd->cd_size, eocd->cd_start_offset);
284
285  /*
286   * It all looks good.  Create a mapping for the CD, and set the fields
287   * in archive.
288   */
289  if (!archive->directory_map.create(debug_file_name, fd,
290          static_cast<off64_t>(eocd->cd_start_offset),
291          static_cast<size_t>(eocd->cd_size), true /* read only */) ) {
292    return kMmapFailed;
293  }
294
295  archive->num_entries = eocd->num_records;
296  archive->directory_offset = eocd->cd_start_offset;
297
298  return 0;
299}
300
301/*
302 * Find the zip Central Directory and memory-map it.
303 *
304 * On success, returns 0 after populating fields from the EOCD area:
305 *   directory_offset
306 *   directory_map
307 *   num_entries
308 */
309static int32_t MapCentralDirectory(int fd, const char* debug_file_name,
310                                   ZipArchive* archive) {
311
312  // Test file length. We use lseek64 to make sure the file
313  // is small enough to be a zip file (Its size must be less than
314  // 0xffffffff bytes).
315  off64_t file_length = lseek64(fd, 0, SEEK_END);
316  if (file_length == -1) {
317    ALOGV("Zip: lseek on fd %d failed", fd);
318    return kInvalidFile;
319  }
320
321  if (file_length > static_cast<off64_t>(0xffffffff)) {
322    ALOGV("Zip: zip file too long %" PRId64, static_cast<int64_t>(file_length));
323    return kInvalidFile;
324  }
325
326  if (file_length < static_cast<off64_t>(sizeof(EocdRecord))) {
327    ALOGV("Zip: length %" PRId64 " is too small to be zip", static_cast<int64_t>(file_length));
328    return kInvalidFile;
329  }
330
331  /*
332   * Perform the traditional EOCD snipe hunt.
333   *
334   * We're searching for the End of Central Directory magic number,
335   * which appears at the start of the EOCD block.  It's followed by
336   * 18 bytes of EOCD stuff and up to 64KB of archive comment.  We
337   * need to read the last part of the file into a buffer, dig through
338   * it to find the magic number, parse some values out, and use those
339   * to determine the extent of the CD.
340   *
341   * We start by pulling in the last part of the file.
342   */
343  off64_t read_amount = kMaxEOCDSearch;
344  if (file_length < read_amount) {
345    read_amount = file_length;
346  }
347
348  uint8_t* scan_buffer = reinterpret_cast<uint8_t*>(malloc(read_amount));
349  int32_t result = MapCentralDirectory0(fd, debug_file_name, archive,
350                                        file_length, read_amount, scan_buffer);
351
352  free(scan_buffer);
353  return result;
354}
355
356/*
357 * Parses the Zip archive's Central Directory.  Allocates and populates the
358 * hash table.
359 *
360 * Returns 0 on success.
361 */
362static int32_t ParseZipArchive(ZipArchive* archive) {
363  const uint8_t* const cd_ptr =
364      reinterpret_cast<const uint8_t*>(archive->directory_map.getDataPtr());
365  const size_t cd_length = archive->directory_map.getDataLength();
366  const uint16_t num_entries = archive->num_entries;
367
368  /*
369   * Create hash table.  We have a minimum 75% load factor, possibly as
370   * low as 50% after we round off to a power of 2.  There must be at
371   * least one unused entry to avoid an infinite loop during creation.
372   */
373  archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3);
374  archive->hash_table = reinterpret_cast<ZipString*>(calloc(archive->hash_table_size,
375      sizeof(ZipString)));
376
377  /*
378   * Walk through the central directory, adding entries to the hash
379   * table and verifying values.
380   */
381  const uint8_t* const cd_end = cd_ptr + cd_length;
382  const uint8_t* ptr = cd_ptr;
383  for (uint16_t i = 0; i < num_entries; i++) {
384    const CentralDirectoryRecord* cdr =
385        reinterpret_cast<const CentralDirectoryRecord*>(ptr);
386    if (cdr->record_signature != CentralDirectoryRecord::kSignature) {
387      ALOGW("Zip: missed a central dir sig (at %" PRIu16 ")", i);
388      return -1;
389    }
390
391    if (ptr + sizeof(CentralDirectoryRecord) > cd_end) {
392      ALOGW("Zip: ran off the end (at %" PRIu16 ")", i);
393      return -1;
394    }
395
396    const off64_t local_header_offset = cdr->local_file_header_offset;
397    if (local_header_offset >= archive->directory_offset) {
398      ALOGW("Zip: bad LFH offset %" PRId64 " at entry %" PRIu16,
399          static_cast<int64_t>(local_header_offset), i);
400      return -1;
401    }
402
403    const uint16_t file_name_length = cdr->file_name_length;
404    const uint16_t extra_length = cdr->extra_field_length;
405    const uint16_t comment_length = cdr->comment_length;
406    const uint8_t* file_name = ptr + sizeof(CentralDirectoryRecord);
407
408    /* check that file name is valid UTF-8 and doesn't contain NUL (U+0000) characters */
409    if (!IsValidEntryName(file_name, file_name_length)) {
410      return -1;
411    }
412
413    /* add the CDE filename to the hash table */
414    ZipString entry_name;
415    entry_name.name = file_name;
416    entry_name.name_length = file_name_length;
417    const int add_result = AddToHash(archive->hash_table,
418        archive->hash_table_size, entry_name);
419    if (add_result != 0) {
420      ALOGW("Zip: Error adding entry to hash table %d", add_result);
421      return add_result;
422    }
423
424    ptr += sizeof(CentralDirectoryRecord) + file_name_length + extra_length + comment_length;
425    if ((ptr - cd_ptr) > static_cast<int64_t>(cd_length)) {
426      ALOGW("Zip: bad CD advance (%tu vs %zu) at entry %" PRIu16,
427          ptr - cd_ptr, cd_length, i);
428      return -1;
429    }
430  }
431  ALOGV("+++ zip good scan %" PRIu16 " entries", num_entries);
432
433  return 0;
434}
435
436static int32_t OpenArchiveInternal(ZipArchive* archive,
437                                   const char* debug_file_name) {
438  int32_t result = -1;
439  if ((result = MapCentralDirectory(archive->fd, debug_file_name, archive))) {
440    return result;
441  }
442
443  if ((result = ParseZipArchive(archive))) {
444    return result;
445  }
446
447  return 0;
448}
449
450int32_t OpenArchiveFd(int fd, const char* debug_file_name,
451                      ZipArchiveHandle* handle, bool assume_ownership) {
452  ZipArchive* archive = new ZipArchive(fd, assume_ownership);
453  *handle = archive;
454  return OpenArchiveInternal(archive, debug_file_name);
455}
456
457int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle) {
458  const int fd = open(fileName, O_RDONLY | O_BINARY, 0);
459  ZipArchive* archive = new ZipArchive(fd, true);
460  *handle = archive;
461
462  if (fd < 0) {
463    ALOGW("Unable to open '%s': %s", fileName, strerror(errno));
464    return kIoError;
465  }
466
467  return OpenArchiveInternal(archive, fileName);
468}
469
470/*
471 * Close a ZipArchive, closing the file and freeing the contents.
472 */
473void CloseArchive(ZipArchiveHandle handle) {
474  ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle);
475  ALOGV("Closing archive %p", archive);
476  delete archive;
477}
478
479static int32_t UpdateEntryFromDataDescriptor(int fd,
480                                             ZipEntry *entry) {
481  uint8_t ddBuf[sizeof(DataDescriptor) + sizeof(DataDescriptor::kOptSignature)];
482  if (!android::base::ReadFully(fd, ddBuf, sizeof(ddBuf))) {
483    return kIoError;
484  }
485
486  const uint32_t ddSignature = *(reinterpret_cast<const uint32_t*>(ddBuf));
487  const uint16_t offset = (ddSignature == DataDescriptor::kOptSignature) ? 4 : 0;
488  const DataDescriptor* descriptor = reinterpret_cast<const DataDescriptor*>(ddBuf + offset);
489
490  entry->crc32 = descriptor->crc32;
491  entry->compressed_length = descriptor->compressed_size;
492  entry->uncompressed_length = descriptor->uncompressed_size;
493
494  return 0;
495}
496
497// Attempts to read |len| bytes into |buf| at offset |off|.
498// On non-Windows platforms, callers are guaranteed that the |fd|
499// offset is unchanged and there is no side effect to this call.
500//
501// On Windows platforms this is not thread-safe.
502static inline bool ReadAtOffset(int fd, uint8_t* buf, size_t len, off64_t off) {
503#if !defined(_WIN32)
504  return TEMP_FAILURE_RETRY(pread64(fd, buf, len, off));
505#else
506  if (lseek64(fd, off, SEEK_SET) != off) {
507    ALOGW("Zip: failed seek to offset %" PRId64, off);
508    return false;
509  }
510  return android::base::ReadFully(fd, buf, len);
511#endif
512}
513
514static int32_t FindEntry(const ZipArchive* archive, const int ent,
515                         ZipEntry* data) {
516  const uint16_t nameLen = archive->hash_table[ent].name_length;
517
518  // Recover the start of the central directory entry from the filename
519  // pointer.  The filename is the first entry past the fixed-size data,
520  // so we can just subtract back from that.
521  const uint8_t* ptr = archive->hash_table[ent].name;
522  ptr -= sizeof(CentralDirectoryRecord);
523
524  // This is the base of our mmapped region, we have to sanity check that
525  // the name that's in the hash table is a pointer to a location within
526  // this mapped region.
527  const uint8_t* base_ptr = reinterpret_cast<const uint8_t*>(
528    archive->directory_map.getDataPtr());
529  if (ptr < base_ptr || ptr > base_ptr + archive->directory_map.getDataLength()) {
530    ALOGW("Zip: Invalid entry pointer");
531    return kInvalidOffset;
532  }
533
534  const CentralDirectoryRecord *cdr =
535      reinterpret_cast<const CentralDirectoryRecord*>(ptr);
536
537  // The offset of the start of the central directory in the zipfile.
538  // We keep this lying around so that we can sanity check all our lengths
539  // and our per-file structures.
540  const off64_t cd_offset = archive->directory_offset;
541
542  // Fill out the compression method, modification time, crc32
543  // and other interesting attributes from the central directory. These
544  // will later be compared against values from the local file header.
545  data->method = cdr->compression_method;
546  data->mod_time = cdr->last_mod_date << 16 | cdr->last_mod_time;
547  data->crc32 = cdr->crc32;
548  data->compressed_length = cdr->compressed_size;
549  data->uncompressed_length = cdr->uncompressed_size;
550
551  // Figure out the local header offset from the central directory. The
552  // actual file data will begin after the local header and the name /
553  // extra comments.
554  const off64_t local_header_offset = cdr->local_file_header_offset;
555  if (local_header_offset + static_cast<off64_t>(sizeof(LocalFileHeader)) >= cd_offset) {
556    ALOGW("Zip: bad local hdr offset in zip");
557    return kInvalidOffset;
558  }
559
560  uint8_t lfh_buf[sizeof(LocalFileHeader)];
561  if (!ReadAtOffset(archive->fd, lfh_buf, sizeof(lfh_buf), local_header_offset)) {
562    ALOGW("Zip: failed reading lfh name from offset %" PRId64,
563        static_cast<int64_t>(local_header_offset));
564    return kIoError;
565  }
566
567  const LocalFileHeader *lfh = reinterpret_cast<const LocalFileHeader*>(lfh_buf);
568
569  if (lfh->lfh_signature != LocalFileHeader::kSignature) {
570    ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64,
571        static_cast<int64_t>(local_header_offset));
572    return kInvalidOffset;
573  }
574
575  // Paranoia: Match the values specified in the local file header
576  // to those specified in the central directory.
577  if ((lfh->gpb_flags & kGPBDDFlagMask) == 0) {
578    data->has_data_descriptor = 0;
579    if (data->compressed_length != lfh->compressed_size
580        || data->uncompressed_length != lfh->uncompressed_size
581        || data->crc32 != lfh->crc32) {
582      ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32
583        ", %" PRIx32 "}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}",
584        data->compressed_length, data->uncompressed_length, data->crc32,
585        lfh->compressed_size, lfh->uncompressed_size, lfh->crc32);
586      return kInconsistentInformation;
587    }
588  } else {
589    data->has_data_descriptor = 1;
590  }
591
592  // Check that the local file header name matches the declared
593  // name in the central directory.
594  if (lfh->file_name_length == nameLen) {
595    const off64_t name_offset = local_header_offset + sizeof(LocalFileHeader);
596    if (name_offset + lfh->file_name_length > cd_offset) {
597      ALOGW("Zip: Invalid declared length");
598      return kInvalidOffset;
599    }
600
601    uint8_t* name_buf = reinterpret_cast<uint8_t*>(malloc(nameLen));
602    if (!ReadAtOffset(archive->fd, name_buf, nameLen, name_offset)) {
603      ALOGW("Zip: failed reading lfh name from offset %" PRId64, static_cast<int64_t>(name_offset));
604      free(name_buf);
605      return kIoError;
606    }
607
608    if (memcmp(archive->hash_table[ent].name, name_buf, nameLen)) {
609      free(name_buf);
610      return kInconsistentInformation;
611    }
612
613    free(name_buf);
614  } else {
615    ALOGW("Zip: lfh name did not match central directory.");
616    return kInconsistentInformation;
617  }
618
619  const off64_t data_offset = local_header_offset + sizeof(LocalFileHeader)
620      + lfh->file_name_length + lfh->extra_field_length;
621  if (data_offset > cd_offset) {
622    ALOGW("Zip: bad data offset %" PRId64 " in zip", static_cast<int64_t>(data_offset));
623    return kInvalidOffset;
624  }
625
626  if (static_cast<off64_t>(data_offset + data->compressed_length) > cd_offset) {
627    ALOGW("Zip: bad compressed length in zip (%" PRId64 " + %" PRIu32 " > %" PRId64 ")",
628      static_cast<int64_t>(data_offset), data->compressed_length, static_cast<int64_t>(cd_offset));
629    return kInvalidOffset;
630  }
631
632  if (data->method == kCompressStored &&
633    static_cast<off64_t>(data_offset + data->uncompressed_length) > cd_offset) {
634     ALOGW("Zip: bad uncompressed length in zip (%" PRId64 " + %" PRIu32 " > %" PRId64 ")",
635       static_cast<int64_t>(data_offset), data->uncompressed_length,
636       static_cast<int64_t>(cd_offset));
637     return kInvalidOffset;
638  }
639
640  data->offset = data_offset;
641  return 0;
642}
643
644struct IterationHandle {
645  uint32_t position;
646  // We're not using vector here because this code is used in the Windows SDK
647  // where the STL is not available.
648  ZipString prefix;
649  ZipString suffix;
650  ZipArchive* archive;
651
652  IterationHandle(const ZipString* in_prefix,
653                  const ZipString* in_suffix) {
654    if (in_prefix) {
655      uint8_t* name_copy = new uint8_t[in_prefix->name_length];
656      memcpy(name_copy, in_prefix->name, in_prefix->name_length);
657      prefix.name = name_copy;
658      prefix.name_length = in_prefix->name_length;
659    } else {
660      prefix.name = NULL;
661      prefix.name_length = 0;
662    }
663    if (in_suffix) {
664      uint8_t* name_copy = new uint8_t[in_suffix->name_length];
665      memcpy(name_copy, in_suffix->name, in_suffix->name_length);
666      suffix.name = name_copy;
667      suffix.name_length = in_suffix->name_length;
668    } else {
669      suffix.name = NULL;
670      suffix.name_length = 0;
671    }
672  }
673
674  ~IterationHandle() {
675    delete[] prefix.name;
676    delete[] suffix.name;
677  }
678};
679
680int32_t StartIteration(ZipArchiveHandle handle, void** cookie_ptr,
681                       const ZipString* optional_prefix,
682                       const ZipString* optional_suffix) {
683  ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle);
684
685  if (archive == NULL || archive->hash_table == NULL) {
686    ALOGW("Zip: Invalid ZipArchiveHandle");
687    return kInvalidHandle;
688  }
689
690  IterationHandle* cookie = new IterationHandle(optional_prefix, optional_suffix);
691  cookie->position = 0;
692  cookie->archive = archive;
693
694  *cookie_ptr = cookie ;
695  return 0;
696}
697
698void EndIteration(void* cookie) {
699  delete reinterpret_cast<IterationHandle*>(cookie);
700}
701
702int32_t FindEntry(const ZipArchiveHandle handle, const ZipString& entryName,
703                  ZipEntry* data) {
704  const ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle);
705  if (entryName.name_length == 0) {
706    ALOGW("Zip: Invalid filename %.*s", entryName.name_length, entryName.name);
707    return kInvalidEntryName;
708  }
709
710  const int64_t ent = EntryToIndex(archive->hash_table,
711    archive->hash_table_size, entryName);
712
713  if (ent < 0) {
714    ALOGV("Zip: Could not find entry %.*s", entryName.name_length, entryName.name);
715    return ent;
716  }
717
718  return FindEntry(archive, ent, data);
719}
720
721int32_t Next(void* cookie, ZipEntry* data, ZipString* name) {
722  IterationHandle* handle = reinterpret_cast<IterationHandle*>(cookie);
723  if (handle == NULL) {
724    return kInvalidHandle;
725  }
726
727  ZipArchive* archive = handle->archive;
728  if (archive == NULL || archive->hash_table == NULL) {
729    ALOGW("Zip: Invalid ZipArchiveHandle");
730    return kInvalidHandle;
731  }
732
733  const uint32_t currentOffset = handle->position;
734  const uint32_t hash_table_length = archive->hash_table_size;
735  const ZipString* hash_table = archive->hash_table;
736
737  for (uint32_t i = currentOffset; i < hash_table_length; ++i) {
738    if (hash_table[i].name != NULL &&
739        (handle->prefix.name_length == 0 ||
740         hash_table[i].StartsWith(handle->prefix)) &&
741        (handle->suffix.name_length == 0 ||
742         hash_table[i].EndsWith(handle->suffix))) {
743      handle->position = (i + 1);
744      const int error = FindEntry(archive, i, data);
745      if (!error) {
746        name->name = hash_table[i].name;
747        name->name_length = hash_table[i].name_length;
748      }
749
750      return error;
751    }
752  }
753
754  handle->position = 0;
755  return kIterationEnd;
756}
757
758class Writer {
759 public:
760  virtual bool Append(uint8_t* buf, size_t buf_size) = 0;
761  virtual ~Writer() {}
762 protected:
763  Writer() = default;
764 private:
765  DISALLOW_COPY_AND_ASSIGN(Writer);
766};
767
768// A Writer that writes data to a fixed size memory region.
769// The size of the memory region must be equal to the total size of
770// the data appended to it.
771class MemoryWriter : public Writer {
772 public:
773  MemoryWriter(uint8_t* buf, size_t size) : Writer(),
774      buf_(buf), size_(size), bytes_written_(0) {
775  }
776
777  virtual bool Append(uint8_t* buf, size_t buf_size) override {
778    if (bytes_written_ + buf_size > size_) {
779      ALOGW("Zip: Unexpected size " ZD " (declared) vs " ZD " (actual)",
780            size_, bytes_written_ + buf_size);
781      return false;
782    }
783
784    memcpy(buf_ + bytes_written_, buf, buf_size);
785    bytes_written_ += buf_size;
786    return true;
787  }
788
789 private:
790  uint8_t* const buf_;
791  const size_t size_;
792  size_t bytes_written_;
793};
794
795// A Writer that appends data to a file |fd| at its current position.
796// The file will be truncated to the end of the written data.
797class FileWriter : public Writer {
798 public:
799
800  // Creates a FileWriter for |fd| and prepare to write |entry| to it,
801  // guaranteeing that the file descriptor is valid and that there's enough
802  // space on the volume to write out the entry completely and that the file
803  // is truncated to the correct length.
804  //
805  // Returns a valid FileWriter on success, |nullptr| if an error occurred.
806  static std::unique_ptr<FileWriter> Create(int fd, const ZipEntry* entry) {
807    const uint32_t declared_length = entry->uncompressed_length;
808    const off64_t current_offset = lseek64(fd, 0, SEEK_CUR);
809    if (current_offset == -1) {
810      ALOGW("Zip: unable to seek to current location on fd %d: %s", fd, strerror(errno));
811      return nullptr;
812    }
813
814    int result = 0;
815#if defined(__linux__)
816    if (declared_length > 0) {
817      // Make sure we have enough space on the volume to extract the compressed
818      // entry. Note that the call to ftruncate below will change the file size but
819      // will not allocate space on disk and this call to fallocate will not
820      // change the file size.
821      // Note: fallocate is only supported by the following filesystems -
822      // btrfs, ext4, ocfs2, and xfs. Therefore fallocate might fail with
823      // EOPNOTSUPP error when issued in other filesystems.
824      // Hence, check for the return error code before concluding that the
825      // disk does not have enough space.
826      result = TEMP_FAILURE_RETRY(fallocate(fd, 0, current_offset, declared_length));
827      if (result == -1 && errno == ENOSPC) {
828        ALOGW("Zip: unable to allocate space for file to %" PRId64 ": %s",
829              static_cast<int64_t>(declared_length + current_offset), strerror(errno));
830        return std::unique_ptr<FileWriter>(nullptr);
831      }
832    }
833#endif  // __linux__
834
835    result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length + current_offset));
836    if (result == -1) {
837      ALOGW("Zip: unable to truncate file to %" PRId64 ": %s",
838            static_cast<int64_t>(declared_length + current_offset), strerror(errno));
839      return std::unique_ptr<FileWriter>(nullptr);
840    }
841
842    return std::unique_ptr<FileWriter>(new FileWriter(fd, declared_length));
843  }
844
845  virtual bool Append(uint8_t* buf, size_t buf_size) override {
846    if (total_bytes_written_ + buf_size > declared_length_) {
847      ALOGW("Zip: Unexpected size " ZD " (declared) vs " ZD " (actual)",
848            declared_length_, total_bytes_written_ + buf_size);
849      return false;
850    }
851
852    const bool result = android::base::WriteFully(fd_, buf, buf_size);
853    if (result) {
854      total_bytes_written_ += buf_size;
855    } else {
856      ALOGW("Zip: unable to write " ZD " bytes to file; %s", buf_size, strerror(errno));
857    }
858
859    return result;
860  }
861 private:
862  FileWriter(const int fd, const size_t declared_length) :
863      Writer(),
864      fd_(fd),
865      declared_length_(declared_length),
866      total_bytes_written_(0) {
867  }
868
869  const int fd_;
870  const size_t declared_length_;
871  size_t total_bytes_written_;
872};
873
874// This method is using libz macros with old-style-casts
875#pragma GCC diagnostic push
876#pragma GCC diagnostic ignored "-Wold-style-cast"
877static inline int zlib_inflateInit2(z_stream* stream, int window_bits) {
878  return inflateInit2(stream, window_bits);
879}
880#pragma GCC diagnostic pop
881
882static int32_t InflateEntryToWriter(int fd, const ZipEntry* entry,
883                                    Writer* writer, uint64_t* crc_out) {
884  const size_t kBufSize = 32768;
885  std::vector<uint8_t> read_buf(kBufSize);
886  std::vector<uint8_t> write_buf(kBufSize);
887  z_stream zstream;
888  int zerr;
889
890  /*
891   * Initialize the zlib stream struct.
892   */
893  memset(&zstream, 0, sizeof(zstream));
894  zstream.zalloc = Z_NULL;
895  zstream.zfree = Z_NULL;
896  zstream.opaque = Z_NULL;
897  zstream.next_in = NULL;
898  zstream.avail_in = 0;
899  zstream.next_out = &write_buf[0];
900  zstream.avail_out = kBufSize;
901  zstream.data_type = Z_UNKNOWN;
902
903  /*
904   * Use the undocumented "negative window bits" feature to tell zlib
905   * that there's no zlib header waiting for it.
906   */
907  zerr = zlib_inflateInit2(&zstream, -MAX_WBITS);
908  if (zerr != Z_OK) {
909    if (zerr == Z_VERSION_ERROR) {
910      ALOGE("Installed zlib is not compatible with linked version (%s)",
911        ZLIB_VERSION);
912    } else {
913      ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr);
914    }
915
916    return kZlibError;
917  }
918
919  auto zstream_deleter = [](z_stream* stream) {
920    inflateEnd(stream);  /* free up any allocated structures */
921  };
922
923  std::unique_ptr<z_stream, decltype(zstream_deleter)> zstream_guard(&zstream, zstream_deleter);
924
925  const uint32_t uncompressed_length = entry->uncompressed_length;
926
927  uint32_t compressed_length = entry->compressed_length;
928  do {
929    /* read as much as we can */
930    if (zstream.avail_in == 0) {
931      const size_t getSize = (compressed_length > kBufSize) ? kBufSize : compressed_length;
932      if (!android::base::ReadFully(fd, read_buf.data(), getSize)) {
933        ALOGW("Zip: inflate read failed, getSize = %zu: %s", getSize, strerror(errno));
934        return kIoError;
935      }
936
937      compressed_length -= getSize;
938
939      zstream.next_in = &read_buf[0];
940      zstream.avail_in = getSize;
941    }
942
943    /* uncompress the data */
944    zerr = inflate(&zstream, Z_NO_FLUSH);
945    if (zerr != Z_OK && zerr != Z_STREAM_END) {
946      ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)",
947          zerr, zstream.next_in, zstream.avail_in,
948          zstream.next_out, zstream.avail_out);
949      return kZlibError;
950    }
951
952    /* write when we're full or when we're done */
953    if (zstream.avail_out == 0 ||
954      (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) {
955      const size_t write_size = zstream.next_out - &write_buf[0];
956      if (!writer->Append(&write_buf[0], write_size)) {
957        // The file might have declared a bogus length.
958        return kInconsistentInformation;
959      }
960
961      zstream.next_out = &write_buf[0];
962      zstream.avail_out = kBufSize;
963    }
964  } while (zerr == Z_OK);
965
966  assert(zerr == Z_STREAM_END);     /* other errors should've been caught */
967
968  // stream.adler holds the crc32 value for such streams.
969  *crc_out = zstream.adler;
970
971  if (zstream.total_out != uncompressed_length || compressed_length != 0) {
972    ALOGW("Zip: size mismatch on inflated file (%lu vs %" PRIu32 ")",
973        zstream.total_out, uncompressed_length);
974    return kInconsistentInformation;
975  }
976
977  return 0;
978}
979
980static int32_t CopyEntryToWriter(int fd, const ZipEntry* entry, Writer* writer,
981                                 uint64_t *crc_out) {
982  static const uint32_t kBufSize = 32768;
983  std::vector<uint8_t> buf(kBufSize);
984
985  const uint32_t length = entry->uncompressed_length;
986  uint32_t count = 0;
987  uint64_t crc = 0;
988  while (count < length) {
989    uint32_t remaining = length - count;
990
991    // Safe conversion because kBufSize is narrow enough for a 32 bit signed
992    // value.
993    const size_t block_size = (remaining > kBufSize) ? kBufSize : remaining;
994    if (!android::base::ReadFully(fd, buf.data(), block_size)) {
995      ALOGW("CopyFileToFile: copy read failed, block_size = %zu: %s", block_size, strerror(errno));
996      return kIoError;
997    }
998
999    if (!writer->Append(&buf[0], block_size)) {
1000      return kIoError;
1001    }
1002    crc = crc32(crc, &buf[0], block_size);
1003    count += block_size;
1004  }
1005
1006  *crc_out = crc;
1007
1008  return 0;
1009}
1010
1011int32_t ExtractToWriter(ZipArchiveHandle handle,
1012                        ZipEntry* entry, Writer* writer) {
1013  ZipArchive* archive = reinterpret_cast<ZipArchive*>(handle);
1014  const uint16_t method = entry->method;
1015  off64_t data_offset = entry->offset;
1016
1017  if (lseek64(archive->fd, data_offset, SEEK_SET) != data_offset) {
1018    ALOGW("Zip: lseek to data at %" PRId64 " failed", static_cast<int64_t>(data_offset));
1019    return kIoError;
1020  }
1021
1022  // this should default to kUnknownCompressionMethod.
1023  int32_t return_value = -1;
1024  uint64_t crc = 0;
1025  if (method == kCompressStored) {
1026    return_value = CopyEntryToWriter(archive->fd, entry, writer, &crc);
1027  } else if (method == kCompressDeflated) {
1028    return_value = InflateEntryToWriter(archive->fd, entry, writer, &crc);
1029  }
1030
1031  if (!return_value && entry->has_data_descriptor) {
1032    return_value = UpdateEntryFromDataDescriptor(archive->fd, entry);
1033    if (return_value) {
1034      return return_value;
1035    }
1036  }
1037
1038  // TODO: Fix this check by passing the right flags to inflate2 so that
1039  // it calculates the CRC for us.
1040  if (entry->crc32 != crc && false) {
1041    ALOGW("Zip: crc mismatch: expected %" PRIu32 ", was %" PRIu64, entry->crc32, crc);
1042    return kInconsistentInformation;
1043  }
1044
1045  return return_value;
1046}
1047
1048int32_t ExtractToMemory(ZipArchiveHandle handle, ZipEntry* entry,
1049                        uint8_t* begin, uint32_t size) {
1050  std::unique_ptr<Writer> writer(new MemoryWriter(begin, size));
1051  return ExtractToWriter(handle, entry, writer.get());
1052}
1053
1054int32_t ExtractEntryToFile(ZipArchiveHandle handle,
1055                           ZipEntry* entry, int fd) {
1056  std::unique_ptr<Writer> writer(FileWriter::Create(fd, entry));
1057  if (writer.get() == nullptr) {
1058    return kIoError;
1059  }
1060
1061  return ExtractToWriter(handle, entry, writer.get());
1062}
1063
1064const char* ErrorCodeString(int32_t error_code) {
1065  if (error_code > kErrorMessageLowerBound && error_code < kErrorMessageUpperBound) {
1066    return kErrorMessages[error_code * -1];
1067  }
1068
1069  return kErrorMessages[0];
1070}
1071
1072int GetFileDescriptor(const ZipArchiveHandle handle) {
1073  return reinterpret_cast<ZipArchive*>(handle)->fd;
1074}
1075