zip_archive.cc revision f6a196522ac823bef7eb06267e3c00ccdef1d298
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *    http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Read-only access to Zip archives, with minimal heap allocation.
19 */
20#include "ziparchive/zip_archive.h"
21
22#include <zlib.h>
23
24#include <assert.h>
25#include <errno.h>
26#include <limits.h>
27#include <log/log.h>
28#include <fcntl.h>
29#include <stdlib.h>
30#include <string.h>
31#include <sys/mman.h>
32#include <unistd.h>
33
34#include <JNIHelp.h>  // TEMP_FAILURE_RETRY may or may not be in unistd
35
36// This is for windows. If we don't open a file in binary mode, weirds
37// things will happen.
38#ifndef O_BINARY
39#define O_BINARY 0
40#endif
41
42/*
43 * Zip file constants.
44 */
45static const uint32_t kEOCDSignature    = 0x06054b50;
46static const uint32_t kEOCDLen          = 2;
47static const uint32_t kEOCDNumEntries   = 8;              // offset to #of entries in file
48static const uint32_t kEOCDSize         = 12;             // size of the central directory
49static const uint32_t kEOCDFileOffset   = 16;             // offset to central directory
50
51static const uint32_t kMaxCommentLen    = 65535;          // longest possible in ushort
52static const uint32_t kMaxEOCDSearch    = (kMaxCommentLen + kEOCDLen);
53
54static const uint32_t kLFHSignature     = 0x04034b50;
55static const uint32_t kLFHLen           = 30;             // excluding variable-len fields
56static const uint32_t kLFHGPBFlags      = 6;              // general purpose bit flags
57static const uint32_t kLFHCRC           = 14;             // offset to CRC
58static const uint32_t kLFHCompLen       = 18;             // offset to compressed length
59static const uint32_t kLFHUncompLen     = 22;             // offset to uncompressed length
60static const uint32_t kLFHNameLen       = 26;             // offset to filename length
61static const uint32_t kLFHExtraLen      = 28;             // offset to extra length
62
63static const uint32_t kCDESignature     = 0x02014b50;
64static const uint32_t kCDELen           = 46;             // excluding variable-len fields
65static const uint32_t kCDEMethod        = 10;             // offset to compression method
66static const uint32_t kCDEModWhen       = 12;             // offset to modification timestamp
67static const uint32_t kCDECRC           = 16;             // offset to entry CRC
68static const uint32_t kCDECompLen       = 20;             // offset to compressed length
69static const uint32_t kCDEUncompLen     = 24;             // offset to uncompressed length
70static const uint32_t kCDENameLen       = 28;             // offset to filename length
71static const uint32_t kCDEExtraLen      = 30;             // offset to extra length
72static const uint32_t kCDECommentLen    = 32;             // offset to comment length
73static const uint32_t kCDELocalOffset   = 42;             // offset to local hdr
74
75static const uint32_t kDDOptSignature   = 0x08074b50;     // *OPTIONAL* data descriptor signature
76static const uint32_t kDDSignatureLen   = 4;
77static const uint32_t kDDLen            = 12;
78static const uint32_t kDDMaxLen         = 16;             // max of 16 bytes with a signature, 12 bytes without
79static const uint32_t kDDCrc32          = 0;              // offset to crc32
80static const uint32_t kDDCompLen        = 4;              // offset to compressed length
81static const uint32_t kDDUncompLen      = 8;              // offset to uncompressed length
82
83static const uint32_t kGPBDDFlagMask    = 0x0008;         // mask value that signifies that the entry has a DD
84
85static const uint32_t kMaxErrorLen = 1024;
86
87static const char* kErrorMessages[] = {
88  "Unknown return code.",
89  "Iteration ended",
90  "Zlib error",
91  "Invalid file",
92  "Invalid handle",
93  "Duplicate entries in archive",
94  "Empty archive",
95  "Entry not found",
96  "Invalid offset",
97  "Inconsistent information",
98  "Invalid entry name",
99  "I/O Error",
100};
101
102static const int32_t kErrorMessageUpperBound = 0;
103
104static const int32_t kIterationEnd = -1;
105
106// We encountered a Zlib error when inflating a stream from this file.
107// Usually indicates file corruption.
108static const int32_t kZlibError = -2;
109
110// The input file cannot be processed as a zip archive. Usually because
111// it's too small, too large or does not have a valid signature.
112static const int32_t kInvalidFile = -3;
113
114// An invalid iteration / ziparchive handle was passed in as an input
115// argument.
116static const int32_t kInvalidHandle = -4;
117
118// The zip archive contained two (or possibly more) entries with the same
119// name.
120static const int32_t kDuplicateEntry = -5;
121
122// The zip archive contains no entries.
123static const int32_t kEmptyArchive = -6;
124
125// The specified entry was not found in the archive.
126static const int32_t kEntryNotFound = -7;
127
128// The zip archive contained an invalid local file header pointer.
129static const int32_t kInvalidOffset = -8;
130
131// The zip archive contained inconsistent entry information. This could
132// be because the central directory & local file header did not agree, or
133// if the actual uncompressed length or crc32 do not match their declared
134// values.
135static const int32_t kInconsistentInformation = -9;
136
137// An invalid entry name was encountered.
138static const int32_t kInvalidEntryName = -10;
139
140// An I/O related system call (read, lseek, ftruncate, map) failed.
141static const int32_t kIoError = -11;
142
143static const int32_t kErrorMessageLowerBound = -12;
144
145
146#ifdef PAGE_SHIFT
147#define SYSTEM_PAGE_SIZE (1 << PAGE_SHIFT)
148#else
149#define SYSTEM_PAGE_SIZE 4096
150#endif
151
152struct MemMapping {
153  uint8_t* addr;  // Start of data
154  size_t length;  // Length of data
155
156  uint8_t* base_address;  // page-aligned base address
157  size_t base_length;  // length of mapping
158};
159
160/*
161 * A Read-only Zip archive.
162 *
163 * We want "open" and "find entry by name" to be fast operations, and
164 * we want to use as little memory as possible.  We memory-map the zip
165 * central directory, and load a hash table with pointers to the filenames
166 * (which aren't null-terminated).  The other fields are at a fixed offset
167 * from the filename, so we don't need to extract those (but we do need
168 * to byte-read and endian-swap them every time we want them).
169 *
170 * It's possible that somebody has handed us a massive (~1GB) zip archive,
171 * so we can't expect to mmap the entire file.
172 *
173 * To speed comparisons when doing a lookup by name, we could make the mapping
174 * "private" (copy-on-write) and null-terminate the filenames after verifying
175 * the record structure.  However, this requires a private mapping of
176 * every page that the Central Directory touches.  Easier to tuck a copy
177 * of the string length into the hash table entry.
178 */
179struct ZipArchive {
180  /* open Zip archive */
181  int fd;
182
183  /* mapped central directory area */
184  off64_t directory_offset;
185  MemMapping directory_map;
186
187  /* number of entries in the Zip archive */
188  uint16_t num_entries;
189
190  /*
191   * We know how many entries are in the Zip archive, so we can have a
192   * fixed-size hash table. We define a load factor of 0.75 and overallocat
193   * so the maximum number entries can never be higher than
194   * ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t.
195   */
196  uint32_t hash_table_size;
197  ZipEntryName* hash_table;
198};
199
200// Returns 0 on success and negative values on failure.
201static int32_t MapFileSegment(const int fd, const off64_t start, const size_t length,
202                              const int prot, const int flags, MemMapping *mapping) {
203  /* adjust to be page-aligned */
204  const int adjust = start % SYSTEM_PAGE_SIZE;
205  const off64_t actual_start = start - adjust;
206  const off64_t actual_length = length + adjust;
207
208  void* map_addr = mmap(NULL, actual_length, prot, flags, fd, actual_start);
209  if (map_addr == MAP_FAILED) {
210    ALOGW("mmap(%llx, R, FILE|SHARED, %d, %llx) failed: %s",
211      actual_length, fd, actual_start, strerror(errno));
212    return kIoError;
213  }
214
215  mapping->base_address = (uint8_t*) map_addr;
216  mapping->base_length = actual_length;
217  mapping->addr = (uint8_t*) map_addr + adjust;
218  mapping->length = length;
219
220  ALOGV("mmap seg (st=%d ln=%d): b=%p bl=%d ad=%p ln=%d",
221      start, length, mapping->base_address, mapping->base_length,
222      mapping->addr, mapping->length);
223
224  return 0;
225}
226
227static void ReleaseMappedSegment(MemMapping* map) {
228  if (map->base_address == 0 || map->base_length == 0) {
229    return;
230  }
231
232  if (munmap(map->base_address, map->base_length) < 0) {
233    ALOGW("munmap(%p, %d) failed: %s",
234        map->base_address, map->base_length, strerror(errno));
235  } else {
236    ALOGV("munmap(%p, %d) succeeded", map->base_address, map->base_length);
237  }
238}
239
240static int32_t CopyFileToFile(int fd, uint8_t* begin, const uint32_t length, uint64_t *crc_out) {
241  static const uint32_t kBufSize = 32768;
242  uint8_t buf[kBufSize];
243
244  uint32_t count = 0;
245  uint64_t crc = 0;
246  while (count < length) {
247    uint32_t remaining = length - count;
248
249    // Safe conversion because kBufSize is narrow enough for a 32 bit signed
250    // value.
251    ssize_t get_size = (remaining > kBufSize) ? kBufSize : remaining;
252    ssize_t actual = TEMP_FAILURE_RETRY(read(fd, buf, get_size));
253
254    if (actual != get_size) {
255      ALOGW("CopyFileToFile: copy read failed (%d vs %zd)",
256          (int) actual, get_size);
257      return kIoError;
258    }
259
260    memcpy(begin + count, buf, get_size);
261    crc = crc32(crc, buf, get_size);
262    count += get_size;
263  }
264
265  *crc_out = crc;
266
267  return 0;
268}
269
270/*
271 * Round up to the next highest power of 2.
272 *
273 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
274 */
275static uint32_t RoundUpPower2(uint32_t val) {
276  val--;
277  val |= val >> 1;
278  val |= val >> 2;
279  val |= val >> 4;
280  val |= val >> 8;
281  val |= val >> 16;
282  val++;
283
284  return val;
285}
286
287static uint32_t ComputeHash(const char* str, uint16_t len) {
288  uint32_t hash = 0;
289
290  while (len--) {
291    hash = hash * 31 + *str++;
292  }
293
294  return hash;
295}
296
297/*
298 * Convert a ZipEntry to a hash table index, verifying that it's in a
299 * valid range.
300 */
301static int64_t EntryToIndex(const ZipEntryName* hash_table,
302                            const uint32_t hash_table_size,
303                            const char* name, uint16_t length) {
304  const uint32_t hash = ComputeHash(name, length);
305
306  // NOTE: (hash_table_size - 1) is guaranteed to be non-negative.
307  uint32_t ent = hash & (hash_table_size - 1);
308  while (hash_table[ent].name != NULL) {
309    if (hash_table[ent].name_length == length &&
310        memcmp(hash_table[ent].name, name, length) == 0) {
311      return ent;
312    }
313
314    ent = (ent + 1) & (hash_table_size - 1);
315  }
316
317  ALOGV("Zip: Unable to find entry %.*s", name_length, name);
318  return kEntryNotFound;
319}
320
321/*
322 * Add a new entry to the hash table.
323 */
324static int32_t AddToHash(ZipEntryName *hash_table, const uint64_t hash_table_size,
325                         const char* name, uint16_t length) {
326  const uint64_t hash = ComputeHash(name, length);
327  uint32_t ent = hash & (hash_table_size - 1);
328
329  /*
330   * We over-allocated the table, so we're guaranteed to find an empty slot.
331   * Further, we guarantee that the hashtable size is not 0.
332   */
333  while (hash_table[ent].name != NULL) {
334    if (hash_table[ent].name_length == length &&
335        memcmp(hash_table[ent].name, name, length) == 0) {
336      // We've found a duplicate entry. We don't accept it
337      ALOGW("Zip: Found duplicate entry %.*s", length, name);
338      return kDuplicateEntry;
339    }
340    ent = (ent + 1) & (hash_table_size - 1);
341  }
342
343  hash_table[ent].name = name;
344  hash_table[ent].name_length = length;
345  return 0;
346}
347
348/*
349 * Get 2 little-endian bytes.
350 */
351static uint16_t get2LE(const uint8_t* src) {
352  return src[0] | (src[1] << 8);
353}
354
355/*
356 * Get 4 little-endian bytes.
357 */
358static uint32_t get4LE(const uint8_t* src) {
359  uint32_t result;
360
361  result = src[0];
362  result |= src[1] << 8;
363  result |= src[2] << 16;
364  result |= src[3] << 24;
365
366  return result;
367}
368
369static int32_t MapCentralDirectory0(int fd, const char* debug_file_name,
370                                    ZipArchive* archive, off64_t file_length,
371                                    uint32_t read_amount, uint8_t* scan_buffer) {
372  const off64_t search_start = file_length - read_amount;
373
374  if (lseek64(fd, search_start, SEEK_SET) != search_start) {
375    ALOGW("Zip: seek %lld failed: %s", search_start, strerror(errno));
376    return kIoError;
377  }
378  ssize_t actual = TEMP_FAILURE_RETRY(read(fd, scan_buffer, read_amount));
379  if (actual != (ssize_t) read_amount) {
380    ALOGW("Zip: read %zd failed: %s", read_amount, strerror(errno));
381    return kIoError;
382  }
383
384  /*
385   * Scan backward for the EOCD magic.  In an archive without a trailing
386   * comment, we'll find it on the first try.  (We may want to consider
387   * doing an initial minimal read; if we don't find it, retry with a
388   * second read as above.)
389   */
390  int i;
391  for (i = read_amount - kEOCDLen; i >= 0; i--) {
392    if (scan_buffer[i] == 0x50 && get4LE(&scan_buffer[i]) == kEOCDSignature) {
393      ALOGV("+++ Found EOCD at buf+%d", i);
394      break;
395    }
396  }
397  if (i < 0) {
398    ALOGD("Zip: EOCD not found, %s is not zip", debug_file_name);
399    return kInvalidFile;
400  }
401
402  const off64_t eocd_offset = search_start + i;
403  const uint8_t* eocd_ptr = scan_buffer + i;
404
405  assert(eocd_offset < file_length);
406
407  /*
408   * Grab the CD offset and size, and the number of entries in the
409   * archive.  Verify that they look reasonable. Widen dir_size and
410   * dir_offset to the file offset type.
411   */
412  const uint16_t num_entries = get2LE(eocd_ptr + kEOCDNumEntries);
413  const off64_t dir_size = get4LE(eocd_ptr + kEOCDSize);
414  const off64_t dir_offset = get4LE(eocd_ptr + kEOCDFileOffset);
415
416  if (dir_offset + dir_size > eocd_offset) {
417    ALOGW("Zip: bad offsets (dir %lld, size %lld, eocd %lld)",
418        dir_offset, dir_size, eocd_offset);
419    return kInvalidOffset;
420  }
421  if (num_entries == 0) {
422    ALOGW("Zip: empty archive?");
423    return kEmptyArchive;
424  }
425
426  ALOGV("+++ num_entries=%d dir_size=%d dir_offset=%d", num_entries, dir_size,
427      dir_offset);
428
429  /*
430   * It all looks good.  Create a mapping for the CD, and set the fields
431   * in archive.
432   */
433  const int32_t result = MapFileSegment(fd, dir_offset, dir_size,
434                                        PROT_READ, MAP_FILE | MAP_SHARED,
435                                        &(archive->directory_map));
436  if (result) {
437    return result;
438  }
439
440  archive->num_entries = num_entries;
441  archive->directory_offset = dir_offset;
442
443  return 0;
444}
445
446/*
447 * Find the zip Central Directory and memory-map it.
448 *
449 * On success, returns 0 after populating fields from the EOCD area:
450 *   directory_offset
451 *   directory_map
452 *   num_entries
453 */
454static int32_t MapCentralDirectory(int fd, const char* debug_file_name,
455                                   ZipArchive* archive) {
456
457  // Test file length. We use lseek64 to make sure the file
458  // is small enough to be a zip file (Its size must be less than
459  // 0xffffffff bytes).
460  off64_t file_length = lseek64(fd, 0, SEEK_END);
461  if (file_length == -1) {
462    ALOGV("Zip: lseek on fd %d failed", fd);
463    return kInvalidFile;
464  }
465
466  if (file_length > (off64_t) 0xffffffff) {
467    ALOGV("Zip: zip file too long %d", file_length);
468    return kInvalidFile;
469  }
470
471  if (file_length < (int64_t) kEOCDLen) {
472    ALOGV("Zip: length %ld is too small to be zip", file_length);
473    return kInvalidFile;
474  }
475
476  /*
477   * Perform the traditional EOCD snipe hunt.
478   *
479   * We're searching for the End of Central Directory magic number,
480   * which appears at the start of the EOCD block.  It's followed by
481   * 18 bytes of EOCD stuff and up to 64KB of archive comment.  We
482   * need to read the last part of the file into a buffer, dig through
483   * it to find the magic number, parse some values out, and use those
484   * to determine the extent of the CD.
485   *
486   * We start by pulling in the last part of the file.
487   */
488  uint32_t read_amount = kMaxEOCDSearch;
489  if (file_length < (off64_t) read_amount) {
490    read_amount = file_length;
491  }
492
493  uint8_t* scan_buffer = (uint8_t*) malloc(read_amount);
494  int32_t result = MapCentralDirectory0(fd, debug_file_name, archive,
495                                        file_length, read_amount, scan_buffer);
496
497  free(scan_buffer);
498  return result;
499}
500
501/*
502 * Parses the Zip archive's Central Directory.  Allocates and populates the
503 * hash table.
504 *
505 * Returns 0 on success.
506 */
507static int32_t ParseZipArchive(ZipArchive* archive) {
508  int32_t result = -1;
509  const uint8_t* cd_ptr = (const uint8_t*) archive->directory_map.addr;
510  size_t cd_length = archive->directory_map.length;
511  uint16_t num_entries = archive->num_entries;
512
513  /*
514   * Create hash table.  We have a minimum 75% load factor, possibly as
515   * low as 50% after we round off to a power of 2.  There must be at
516   * least one unused entry to avoid an infinite loop during creation.
517   */
518  archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3);
519  archive->hash_table = (ZipEntryName*) calloc(archive->hash_table_size,
520      sizeof(ZipEntryName));
521
522  /*
523   * Walk through the central directory, adding entries to the hash
524   * table and verifying values.
525   */
526  const uint8_t* ptr = cd_ptr;
527  for (uint16_t i = 0; i < num_entries; i++) {
528    if (get4LE(ptr) != kCDESignature) {
529      ALOGW("Zip: missed a central dir sig (at %d)", i);
530      goto bail;
531    }
532
533    if (ptr + kCDELen > cd_ptr + cd_length) {
534      ALOGW("Zip: ran off the end (at %d)", i);
535      goto bail;
536    }
537
538    const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset);
539    if (local_header_offset >= archive->directory_offset) {
540      ALOGW("Zip: bad LFH offset %lld at entry %d", local_header_offset, i);
541      goto bail;
542    }
543
544    const uint16_t file_name_length = get2LE(ptr + kCDENameLen);
545    const uint16_t extra_length = get2LE(ptr + kCDEExtraLen);
546    const uint16_t comment_length = get2LE(ptr + kCDECommentLen);
547
548    /* add the CDE filename to the hash table */
549    const int add_result = AddToHash(archive->hash_table,
550        archive->hash_table_size, (const char*) ptr + kCDELen, file_name_length);
551    if (add_result) {
552      ALOGW("Zip: Error adding entry to hash table %d", add_result);
553      result = add_result;
554      goto bail;
555    }
556
557    ptr += kCDELen + file_name_length + extra_length + comment_length;
558    if ((size_t)(ptr - cd_ptr) > cd_length) {
559      ALOGW("Zip: bad CD advance (%d vs %zd) at entry %d",
560        (int) (ptr - cd_ptr), cd_length, i);
561      goto bail;
562    }
563  }
564  ALOGV("+++ zip good scan %d entries", num_entries);
565
566  result = 0;
567
568bail:
569  return result;
570}
571
572static int32_t OpenArchiveInternal(ZipArchive* archive,
573                                   const char* debug_file_name) {
574  int32_t result = -1;
575  if ((result = MapCentralDirectory(archive->fd, debug_file_name, archive))) {
576    return result;
577  }
578
579  if ((result = ParseZipArchive(archive))) {
580    return result;
581  }
582
583  return 0;
584}
585
586int32_t OpenArchiveFd(int fd, const char* debug_file_name,
587                      ZipArchiveHandle* handle) {
588  ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive));
589  memset(archive, 0, sizeof(*archive));
590  *handle = archive;
591
592  archive->fd = fd;
593
594  return OpenArchiveInternal(archive, debug_file_name);
595}
596
597int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle) {
598  ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive));
599  memset(archive, 0, sizeof(*archive));
600  *handle = archive;
601
602  const int fd = open(fileName, O_RDONLY | O_BINARY, 0);
603  if (fd < 0) {
604    ALOGW("Unable to open '%s': %s", fileName, strerror(errno));
605    return kIoError;
606  } else {
607    archive->fd = fd;
608  }
609
610  return OpenArchiveInternal(archive, fileName);
611}
612
613/*
614 * Close a ZipArchive, closing the file and freeing the contents.
615 */
616void CloseArchive(ZipArchiveHandle handle) {
617  ZipArchive* archive = (ZipArchive*) handle;
618  ALOGV("Closing archive %p", archive);
619
620  if (archive->fd >= 0) {
621    close(archive->fd);
622  }
623
624  ReleaseMappedSegment(&archive->directory_map);
625  free(archive->hash_table);
626
627  /* ensure nobody tries to use the ZipArchive after it's closed */
628  archive->directory_offset = -1;
629  archive->fd = -1;
630  archive->num_entries = -1;
631  archive->hash_table_size = -1;
632  archive->hash_table = NULL;
633}
634
635static int32_t UpdateEntryFromDataDescriptor(int fd,
636                                             ZipEntry *entry) {
637  uint8_t ddBuf[kDDMaxLen];
638  ssize_t actual = TEMP_FAILURE_RETRY(read(fd, ddBuf, sizeof(ddBuf)));
639  if (actual != sizeof(ddBuf)) {
640    return kIoError;
641  }
642
643  const uint32_t ddSignature = get4LE(ddBuf);
644  uint16_t ddOffset = 0;
645  if (ddSignature == kDDOptSignature) {
646    ddOffset = 4;
647  }
648
649  entry->crc32 = get4LE(ddBuf + ddOffset + kDDCrc32);
650  entry->compressed_length = get4LE(ddBuf + ddOffset + kDDCompLen);
651  entry->uncompressed_length = get4LE(ddBuf + ddOffset + kDDUncompLen);
652
653  return 0;
654}
655
656// Attempts to read |len| bytes into |buf| at offset |off|.
657//
658// This method uses pread64 on platforms that support it and
659// lseek64 + read on platforms that don't. This implies that
660// callers should not rely on the |fd| offset being incremented
661// as a side effect of this call.
662static inline ssize_t ReadAtOffset(int fd, uint8_t* buf, size_t len,
663                                   off64_t off) {
664#ifdef HAVE_PREAD
665  return TEMP_FAILURE_RETRY(pread64(fd, buf, len, off));
666#else
667  // The only supported platform that doesn't support pread at the moment
668  // is Windows. Only recent versions of windows support unix like forks,
669  // and even there the semantics are quite different.
670  if (lseek64(fd, off, SEEK_SET) != off) {
671    ALOGW("Zip: failed seek to offset %lld", name_offset);
672    return kIoError;
673  }
674
675  return TEMP_FAILURE_RETRY(read(fd, buf, len));
676#endif  // HAVE_PREAD
677}
678
679static int32_t FindEntry(const ZipArchive* archive, const int ent,
680                         ZipEntry* data) {
681  const uint16_t nameLen = archive->hash_table[ent].name_length;
682  const char* name = archive->hash_table[ent].name;
683
684  // Recover the start of the central directory entry from the filename
685  // pointer.  The filename is the first entry past the fixed-size data,
686  // so we can just subtract back from that.
687  const unsigned char* ptr = (const unsigned char*) name;
688  ptr -= kCDELen;
689
690  // This is the base of our mmapped region, we have to sanity check that
691  // the name that's in the hash table is a pointer to a location within
692  // this mapped region.
693  const unsigned char* base_ptr = (const unsigned char*)
694    archive->directory_map.addr;
695  if (ptr < base_ptr || ptr > base_ptr + archive->directory_map.length) {
696    ALOGW("Zip: Invalid entry pointer");
697    return kInvalidOffset;
698  }
699
700  // The offset of the start of the central directory in the zipfile.
701  // We keep this lying around so that we can sanity check all our lengths
702  // and our per-file structures.
703  const off64_t cd_offset = archive->directory_offset;
704
705  // Fill out the compression method, modification time, crc32
706  // and other interesting attributes from the central directory. These
707  // will later be compared against values from the local file header.
708  data->method = get2LE(ptr + kCDEMethod);
709  data->mod_time = get4LE(ptr + kCDEModWhen);
710  data->crc32 = get4LE(ptr + kCDECRC);
711  data->compressed_length = get4LE(ptr + kCDECompLen);
712  data->uncompressed_length = get4LE(ptr + kCDEUncompLen);
713
714  // Figure out the local header offset from the central directory. The
715  // actual file data will begin after the local header and the name /
716  // extra comments.
717  const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset);
718  if (local_header_offset + (off64_t) kLFHLen >= cd_offset) {
719    ALOGW("Zip: bad local hdr offset in zip");
720    return kInvalidOffset;
721  }
722
723  uint8_t lfh_buf[kLFHLen];
724  ssize_t actual = ReadAtOffset(archive->fd, lfh_buf, sizeof(lfh_buf),
725                                 local_header_offset);
726  if (actual != sizeof(lfh_buf)) {
727    ALOGW("Zip: failed reading lfh name from offset %lld", local_header_offset);
728    return kIoError;
729  }
730
731  if (get4LE(lfh_buf) != kLFHSignature) {
732    ALOGW("Zip: didn't find signature at start of lfh, offset=%lld",
733        local_header_offset);
734    return kInvalidOffset;
735  }
736
737  // Paranoia: Match the values specified in the local file header
738  // to those specified in the central directory.
739  const uint16_t lfhGpbFlags = get2LE(lfh_buf + kLFHGPBFlags);
740  const uint16_t lfhNameLen = get2LE(lfh_buf + kLFHNameLen);
741  const uint16_t lfhExtraLen = get2LE(lfh_buf + kLFHExtraLen);
742
743  if ((lfhGpbFlags & kGPBDDFlagMask) == 0) {
744    const uint32_t lfhCrc = get4LE(lfh_buf + kLFHCRC);
745    const uint32_t lfhCompLen = get4LE(lfh_buf + kLFHCompLen);
746    const uint32_t lfhUncompLen = get4LE(lfh_buf + kLFHUncompLen);
747
748    data->has_data_descriptor = 0;
749    if (data->compressed_length != lfhCompLen || data->uncompressed_length != lfhUncompLen
750        || data->crc32 != lfhCrc) {
751      ALOGW("Zip: size/crc32 mismatch. expected {%d, %d, %x}, was {%d, %d, %x}",
752        data->compressed_length, data->uncompressed_length, data->crc32,
753        lfhCompLen, lfhUncompLen, lfhCrc);
754      return kInconsistentInformation;
755    }
756  } else {
757    data->has_data_descriptor = 1;
758  }
759
760  // Check that the local file header name matches the declared
761  // name in the central directory.
762  if (lfhNameLen == nameLen) {
763    const off64_t name_offset = local_header_offset + kLFHLen;
764    if (name_offset + lfhNameLen >= cd_offset) {
765      ALOGW("Zip: Invalid declared length");
766      return kInvalidOffset;
767    }
768
769    uint8_t* name_buf = (uint8_t*) malloc(nameLen);
770    ssize_t actual = ReadAtOffset(archive->fd, name_buf, nameLen,
771                                  name_offset);
772
773    if (actual != nameLen) {
774      ALOGW("Zip: failed reading lfh name from offset %lld", name_offset);
775      free(name_buf);
776      return kIoError;
777    }
778
779    if (memcmp(name, name_buf, nameLen)) {
780      free(name_buf);
781      return kInconsistentInformation;
782    }
783
784    free(name_buf);
785  } else {
786    ALOGW("Zip: lfh name did not match central directory.");
787    return kInconsistentInformation;
788  }
789
790  const off64_t data_offset = local_header_offset + kLFHLen + lfhNameLen + lfhExtraLen;
791  if (data_offset >= cd_offset) {
792    ALOGW("Zip: bad data offset %lld in zip", (off64_t) data_offset);
793    return kInvalidOffset;
794  }
795
796  if ((off64_t)(data_offset + data->compressed_length) > cd_offset) {
797    ALOGW("Zip: bad compressed length in zip (%lld + %zd > %lld)",
798      data_offset, data->compressed_length, cd_offset);
799    return kInvalidOffset;
800  }
801
802  if (data->method == kCompressStored &&
803    (off64_t)(data_offset + data->uncompressed_length) > cd_offset) {
804     ALOGW("Zip: bad uncompressed length in zip (%lld + %zd > %lld)",
805       data_offset, data->uncompressed_length, cd_offset);
806     return kInvalidOffset;
807  }
808
809  data->offset = data_offset;
810  return 0;
811}
812
813struct IterationHandle {
814  uint32_t position;
815  const char* prefix;
816  uint16_t prefix_len;
817  ZipArchive* archive;
818};
819
820int32_t StartIteration(ZipArchiveHandle handle, void** cookie_ptr, const char* prefix) {
821  ZipArchive* archive = (ZipArchive *) handle;
822
823  if (archive == NULL || archive->hash_table == NULL) {
824    ALOGW("Zip: Invalid ZipArchiveHandle");
825    return kInvalidHandle;
826  }
827
828  IterationHandle* cookie = (IterationHandle*) malloc(sizeof(IterationHandle));
829  cookie->position = 0;
830  cookie->prefix = prefix;
831  cookie->archive = archive;
832  if (prefix != NULL) {
833    cookie->prefix_len = strlen(prefix);
834  }
835
836  *cookie_ptr = cookie ;
837  return 0;
838}
839
840int32_t FindEntry(const ZipArchiveHandle handle, const char* entryName,
841                  ZipEntry* data) {
842  const ZipArchive* archive = (ZipArchive*) handle;
843  const int nameLen = strlen(entryName);
844  if (nameLen == 0 || nameLen > 65535) {
845    ALOGW("Zip: Invalid filename %s", entryName);
846    return kInvalidEntryName;
847  }
848
849  const int64_t ent = EntryToIndex(archive->hash_table,
850    archive->hash_table_size, entryName, nameLen);
851
852  if (ent < 0) {
853    ALOGD("Zip: Could not find entry %.*s", nameLen, entryName);
854    return ent;
855  }
856
857  return FindEntry(archive, ent, data);
858}
859
860int32_t Next(void* cookie, ZipEntry* data, ZipEntryName* name) {
861  IterationHandle* handle = (IterationHandle *) cookie;
862  if (handle == NULL) {
863    return kInvalidHandle;
864  }
865
866  ZipArchive* archive = handle->archive;
867  if (archive == NULL || archive->hash_table == NULL) {
868    ALOGW("Zip: Invalid ZipArchiveHandle");
869    return kInvalidHandle;
870  }
871
872  const uint32_t currentOffset = handle->position;
873  const uint32_t hash_table_length = archive->hash_table_size;
874  const ZipEntryName *hash_table = archive->hash_table;
875
876  for (uint32_t i = currentOffset; i < hash_table_length; ++i) {
877    if (hash_table[i].name != NULL &&
878        (handle->prefix == NULL ||
879         (memcmp(handle->prefix, hash_table[i].name, handle->prefix_len) == 0))) {
880      handle->position = (i + 1);
881      const int error = FindEntry(archive, i, data);
882      if (!error) {
883        name->name = hash_table[i].name;
884        name->name_length = hash_table[i].name_length;
885      }
886
887      return error;
888    }
889  }
890
891  handle->position = 0;
892  return kIterationEnd;
893}
894
895static int32_t InflateToFile(int fd, const ZipEntry* entry,
896                             uint8_t* begin, uint32_t length,
897                             uint64_t* crc_out) {
898  int32_t result = -1;
899  const uint32_t kBufSize = 32768;
900  uint8_t read_buf[kBufSize];
901  uint8_t write_buf[kBufSize];
902  z_stream zstream;
903  int zerr;
904
905  /*
906   * Initialize the zlib stream struct.
907   */
908  memset(&zstream, 0, sizeof(zstream));
909  zstream.zalloc = Z_NULL;
910  zstream.zfree = Z_NULL;
911  zstream.opaque = Z_NULL;
912  zstream.next_in = NULL;
913  zstream.avail_in = 0;
914  zstream.next_out = (Bytef*) write_buf;
915  zstream.avail_out = kBufSize;
916  zstream.data_type = Z_UNKNOWN;
917
918  /*
919   * Use the undocumented "negative window bits" feature to tell zlib
920   * that there's no zlib header waiting for it.
921   */
922  zerr = inflateInit2(&zstream, -MAX_WBITS);
923  if (zerr != Z_OK) {
924    if (zerr == Z_VERSION_ERROR) {
925      ALOGE("Installed zlib is not compatible with linked version (%s)",
926        ZLIB_VERSION);
927    } else {
928      ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr);
929    }
930
931    return kZlibError;
932  }
933
934  const uint32_t uncompressed_length = entry->uncompressed_length;
935
936  uint32_t compressed_length = entry->compressed_length;
937  uint32_t write_count = 0;
938  do {
939    /* read as much as we can */
940    if (zstream.avail_in == 0) {
941      const ssize_t getSize = (compressed_length > kBufSize) ? kBufSize : compressed_length;
942      const ssize_t actual = TEMP_FAILURE_RETRY(read(fd, read_buf, getSize));
943      if (actual != getSize) {
944        ALOGW("Zip: inflate read failed (%d vs %zd)", actual, getSize);
945        result = kIoError;
946        goto z_bail;
947      }
948
949      compressed_length -= getSize;
950
951      zstream.next_in = read_buf;
952      zstream.avail_in = getSize;
953    }
954
955    /* uncompress the data */
956    zerr = inflate(&zstream, Z_NO_FLUSH);
957    if (zerr != Z_OK && zerr != Z_STREAM_END) {
958      ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)",
959          zerr, zstream.next_in, zstream.avail_in,
960          zstream.next_out, zstream.avail_out);
961      result = kZlibError;
962      goto z_bail;
963    }
964
965    /* write when we're full or when we're done */
966    if (zstream.avail_out == 0 ||
967      (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) {
968      const size_t write_size = zstream.next_out - write_buf;
969      // The file might have declared a bogus length.
970      if (write_size + write_count > length) {
971        goto z_bail;
972      }
973      memcpy(begin + write_count, write_buf, write_size);
974      write_count += write_size;
975
976      zstream.next_out = write_buf;
977      zstream.avail_out = kBufSize;
978    }
979  } while (zerr == Z_OK);
980
981  assert(zerr == Z_STREAM_END);     /* other errors should've been caught */
982
983  // stream.adler holds the crc32 value for such streams.
984  *crc_out = zstream.adler;
985
986  if (zstream.total_out != uncompressed_length || compressed_length != 0) {
987    ALOGW("Zip: size mismatch on inflated file (%ld vs %zd)",
988        zstream.total_out, uncompressed_length);
989    result = kInconsistentInformation;
990    goto z_bail;
991  }
992
993  result = 0;
994
995z_bail:
996  inflateEnd(&zstream);    /* free up any allocated structures */
997
998  return result;
999}
1000
1001int32_t ExtractToMemory(ZipArchiveHandle handle,
1002                        ZipEntry* entry, uint8_t* begin, uint32_t size) {
1003  ZipArchive* archive = (ZipArchive*) handle;
1004  const uint16_t method = entry->method;
1005  off64_t data_offset = entry->offset;
1006
1007  if (lseek64(archive->fd, data_offset, SEEK_SET) != data_offset) {
1008    ALOGW("Zip: lseek to data at %lld failed", (off64_t) data_offset);
1009    return kIoError;
1010  }
1011
1012  // this should default to kUnknownCompressionMethod.
1013  int32_t return_value = -1;
1014  uint64_t crc = 0;
1015  if (method == kCompressStored) {
1016    return_value = CopyFileToFile(archive->fd, begin, size, &crc);
1017  } else if (method == kCompressDeflated) {
1018    return_value = InflateToFile(archive->fd, entry, begin, size, &crc);
1019  }
1020
1021  if (!return_value && entry->has_data_descriptor) {
1022    return_value = UpdateEntryFromDataDescriptor(archive->fd, entry);
1023    if (return_value) {
1024      return return_value;
1025    }
1026  }
1027
1028  // TODO: Fix this check by passing the right flags to inflate2 so that
1029  // it calculates the CRC for us.
1030  if (entry->crc32 != crc && false) {
1031    ALOGW("Zip: crc mismatch: expected %u, was %llu", entry->crc32, crc);
1032    return kInconsistentInformation;
1033  }
1034
1035  return return_value;
1036}
1037
1038int32_t ExtractEntryToFile(ZipArchiveHandle handle,
1039                           ZipEntry* entry, int fd) {
1040  const int32_t declared_length = entry->uncompressed_length;
1041
1042  int result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length));
1043  if (result == -1) {
1044    ALOGW("Zip: unable to truncate file to %ud", declared_length);
1045    return kIoError;
1046  }
1047
1048  MemMapping mapping;
1049  int32_t error = MapFileSegment(fd, 0, declared_length,
1050                                 PROT_READ | PROT_WRITE,
1051                                 MAP_FILE | MAP_SHARED,
1052                                 &mapping);
1053  if (error) {
1054    return error;
1055  }
1056
1057  error = ExtractToMemory(handle, entry, mapping.addr,
1058                          mapping.length);
1059  ReleaseMappedSegment(&mapping);
1060  return error;
1061}
1062
1063const char* ErrorCodeString(int32_t error_code) {
1064  if (error_code > kErrorMessageLowerBound && error_code < kErrorMessageUpperBound) {
1065    return kErrorMessages[error_code * -1];
1066  }
1067
1068  return kErrorMessages[0];
1069}
1070
1071int GetFileDescriptor(const ZipArchiveHandle handle) {
1072  return ((ZipArchive*) handle)->fd;
1073}
1074
1075