zip_archive.cc revision f6a196522ac823bef7eb06267e3c00ccdef1d298
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* 18 * Read-only access to Zip archives, with minimal heap allocation. 19 */ 20#include "ziparchive/zip_archive.h" 21 22#include <zlib.h> 23 24#include <assert.h> 25#include <errno.h> 26#include <limits.h> 27#include <log/log.h> 28#include <fcntl.h> 29#include <stdlib.h> 30#include <string.h> 31#include <sys/mman.h> 32#include <unistd.h> 33 34#include <JNIHelp.h> // TEMP_FAILURE_RETRY may or may not be in unistd 35 36// This is for windows. If we don't open a file in binary mode, weirds 37// things will happen. 38#ifndef O_BINARY 39#define O_BINARY 0 40#endif 41 42/* 43 * Zip file constants. 44 */ 45static const uint32_t kEOCDSignature = 0x06054b50; 46static const uint32_t kEOCDLen = 2; 47static const uint32_t kEOCDNumEntries = 8; // offset to #of entries in file 48static const uint32_t kEOCDSize = 12; // size of the central directory 49static const uint32_t kEOCDFileOffset = 16; // offset to central directory 50 51static const uint32_t kMaxCommentLen = 65535; // longest possible in ushort 52static const uint32_t kMaxEOCDSearch = (kMaxCommentLen + kEOCDLen); 53 54static const uint32_t kLFHSignature = 0x04034b50; 55static const uint32_t kLFHLen = 30; // excluding variable-len fields 56static const uint32_t kLFHGPBFlags = 6; // general purpose bit flags 57static const uint32_t kLFHCRC = 14; // offset to CRC 58static const uint32_t kLFHCompLen = 18; // offset to compressed length 59static const uint32_t kLFHUncompLen = 22; // offset to uncompressed length 60static const uint32_t kLFHNameLen = 26; // offset to filename length 61static const uint32_t kLFHExtraLen = 28; // offset to extra length 62 63static const uint32_t kCDESignature = 0x02014b50; 64static const uint32_t kCDELen = 46; // excluding variable-len fields 65static const uint32_t kCDEMethod = 10; // offset to compression method 66static const uint32_t kCDEModWhen = 12; // offset to modification timestamp 67static const uint32_t kCDECRC = 16; // offset to entry CRC 68static const uint32_t kCDECompLen = 20; // offset to compressed length 69static const uint32_t kCDEUncompLen = 24; // offset to uncompressed length 70static const uint32_t kCDENameLen = 28; // offset to filename length 71static const uint32_t kCDEExtraLen = 30; // offset to extra length 72static const uint32_t kCDECommentLen = 32; // offset to comment length 73static const uint32_t kCDELocalOffset = 42; // offset to local hdr 74 75static const uint32_t kDDOptSignature = 0x08074b50; // *OPTIONAL* data descriptor signature 76static const uint32_t kDDSignatureLen = 4; 77static const uint32_t kDDLen = 12; 78static const uint32_t kDDMaxLen = 16; // max of 16 bytes with a signature, 12 bytes without 79static const uint32_t kDDCrc32 = 0; // offset to crc32 80static const uint32_t kDDCompLen = 4; // offset to compressed length 81static const uint32_t kDDUncompLen = 8; // offset to uncompressed length 82 83static const uint32_t kGPBDDFlagMask = 0x0008; // mask value that signifies that the entry has a DD 84 85static const uint32_t kMaxErrorLen = 1024; 86 87static const char* kErrorMessages[] = { 88 "Unknown return code.", 89 "Iteration ended", 90 "Zlib error", 91 "Invalid file", 92 "Invalid handle", 93 "Duplicate entries in archive", 94 "Empty archive", 95 "Entry not found", 96 "Invalid offset", 97 "Inconsistent information", 98 "Invalid entry name", 99 "I/O Error", 100}; 101 102static const int32_t kErrorMessageUpperBound = 0; 103 104static const int32_t kIterationEnd = -1; 105 106// We encountered a Zlib error when inflating a stream from this file. 107// Usually indicates file corruption. 108static const int32_t kZlibError = -2; 109 110// The input file cannot be processed as a zip archive. Usually because 111// it's too small, too large or does not have a valid signature. 112static const int32_t kInvalidFile = -3; 113 114// An invalid iteration / ziparchive handle was passed in as an input 115// argument. 116static const int32_t kInvalidHandle = -4; 117 118// The zip archive contained two (or possibly more) entries with the same 119// name. 120static const int32_t kDuplicateEntry = -5; 121 122// The zip archive contains no entries. 123static const int32_t kEmptyArchive = -6; 124 125// The specified entry was not found in the archive. 126static const int32_t kEntryNotFound = -7; 127 128// The zip archive contained an invalid local file header pointer. 129static const int32_t kInvalidOffset = -8; 130 131// The zip archive contained inconsistent entry information. This could 132// be because the central directory & local file header did not agree, or 133// if the actual uncompressed length or crc32 do not match their declared 134// values. 135static const int32_t kInconsistentInformation = -9; 136 137// An invalid entry name was encountered. 138static const int32_t kInvalidEntryName = -10; 139 140// An I/O related system call (read, lseek, ftruncate, map) failed. 141static const int32_t kIoError = -11; 142 143static const int32_t kErrorMessageLowerBound = -12; 144 145 146#ifdef PAGE_SHIFT 147#define SYSTEM_PAGE_SIZE (1 << PAGE_SHIFT) 148#else 149#define SYSTEM_PAGE_SIZE 4096 150#endif 151 152struct MemMapping { 153 uint8_t* addr; // Start of data 154 size_t length; // Length of data 155 156 uint8_t* base_address; // page-aligned base address 157 size_t base_length; // length of mapping 158}; 159 160/* 161 * A Read-only Zip archive. 162 * 163 * We want "open" and "find entry by name" to be fast operations, and 164 * we want to use as little memory as possible. We memory-map the zip 165 * central directory, and load a hash table with pointers to the filenames 166 * (which aren't null-terminated). The other fields are at a fixed offset 167 * from the filename, so we don't need to extract those (but we do need 168 * to byte-read and endian-swap them every time we want them). 169 * 170 * It's possible that somebody has handed us a massive (~1GB) zip archive, 171 * so we can't expect to mmap the entire file. 172 * 173 * To speed comparisons when doing a lookup by name, we could make the mapping 174 * "private" (copy-on-write) and null-terminate the filenames after verifying 175 * the record structure. However, this requires a private mapping of 176 * every page that the Central Directory touches. Easier to tuck a copy 177 * of the string length into the hash table entry. 178 */ 179struct ZipArchive { 180 /* open Zip archive */ 181 int fd; 182 183 /* mapped central directory area */ 184 off64_t directory_offset; 185 MemMapping directory_map; 186 187 /* number of entries in the Zip archive */ 188 uint16_t num_entries; 189 190 /* 191 * We know how many entries are in the Zip archive, so we can have a 192 * fixed-size hash table. We define a load factor of 0.75 and overallocat 193 * so the maximum number entries can never be higher than 194 * ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t. 195 */ 196 uint32_t hash_table_size; 197 ZipEntryName* hash_table; 198}; 199 200// Returns 0 on success and negative values on failure. 201static int32_t MapFileSegment(const int fd, const off64_t start, const size_t length, 202 const int prot, const int flags, MemMapping *mapping) { 203 /* adjust to be page-aligned */ 204 const int adjust = start % SYSTEM_PAGE_SIZE; 205 const off64_t actual_start = start - adjust; 206 const off64_t actual_length = length + adjust; 207 208 void* map_addr = mmap(NULL, actual_length, prot, flags, fd, actual_start); 209 if (map_addr == MAP_FAILED) { 210 ALOGW("mmap(%llx, R, FILE|SHARED, %d, %llx) failed: %s", 211 actual_length, fd, actual_start, strerror(errno)); 212 return kIoError; 213 } 214 215 mapping->base_address = (uint8_t*) map_addr; 216 mapping->base_length = actual_length; 217 mapping->addr = (uint8_t*) map_addr + adjust; 218 mapping->length = length; 219 220 ALOGV("mmap seg (st=%d ln=%d): b=%p bl=%d ad=%p ln=%d", 221 start, length, mapping->base_address, mapping->base_length, 222 mapping->addr, mapping->length); 223 224 return 0; 225} 226 227static void ReleaseMappedSegment(MemMapping* map) { 228 if (map->base_address == 0 || map->base_length == 0) { 229 return; 230 } 231 232 if (munmap(map->base_address, map->base_length) < 0) { 233 ALOGW("munmap(%p, %d) failed: %s", 234 map->base_address, map->base_length, strerror(errno)); 235 } else { 236 ALOGV("munmap(%p, %d) succeeded", map->base_address, map->base_length); 237 } 238} 239 240static int32_t CopyFileToFile(int fd, uint8_t* begin, const uint32_t length, uint64_t *crc_out) { 241 static const uint32_t kBufSize = 32768; 242 uint8_t buf[kBufSize]; 243 244 uint32_t count = 0; 245 uint64_t crc = 0; 246 while (count < length) { 247 uint32_t remaining = length - count; 248 249 // Safe conversion because kBufSize is narrow enough for a 32 bit signed 250 // value. 251 ssize_t get_size = (remaining > kBufSize) ? kBufSize : remaining; 252 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, buf, get_size)); 253 254 if (actual != get_size) { 255 ALOGW("CopyFileToFile: copy read failed (%d vs %zd)", 256 (int) actual, get_size); 257 return kIoError; 258 } 259 260 memcpy(begin + count, buf, get_size); 261 crc = crc32(crc, buf, get_size); 262 count += get_size; 263 } 264 265 *crc_out = crc; 266 267 return 0; 268} 269 270/* 271 * Round up to the next highest power of 2. 272 * 273 * Found on http://graphics.stanford.edu/~seander/bithacks.html. 274 */ 275static uint32_t RoundUpPower2(uint32_t val) { 276 val--; 277 val |= val >> 1; 278 val |= val >> 2; 279 val |= val >> 4; 280 val |= val >> 8; 281 val |= val >> 16; 282 val++; 283 284 return val; 285} 286 287static uint32_t ComputeHash(const char* str, uint16_t len) { 288 uint32_t hash = 0; 289 290 while (len--) { 291 hash = hash * 31 + *str++; 292 } 293 294 return hash; 295} 296 297/* 298 * Convert a ZipEntry to a hash table index, verifying that it's in a 299 * valid range. 300 */ 301static int64_t EntryToIndex(const ZipEntryName* hash_table, 302 const uint32_t hash_table_size, 303 const char* name, uint16_t length) { 304 const uint32_t hash = ComputeHash(name, length); 305 306 // NOTE: (hash_table_size - 1) is guaranteed to be non-negative. 307 uint32_t ent = hash & (hash_table_size - 1); 308 while (hash_table[ent].name != NULL) { 309 if (hash_table[ent].name_length == length && 310 memcmp(hash_table[ent].name, name, length) == 0) { 311 return ent; 312 } 313 314 ent = (ent + 1) & (hash_table_size - 1); 315 } 316 317 ALOGV("Zip: Unable to find entry %.*s", name_length, name); 318 return kEntryNotFound; 319} 320 321/* 322 * Add a new entry to the hash table. 323 */ 324static int32_t AddToHash(ZipEntryName *hash_table, const uint64_t hash_table_size, 325 const char* name, uint16_t length) { 326 const uint64_t hash = ComputeHash(name, length); 327 uint32_t ent = hash & (hash_table_size - 1); 328 329 /* 330 * We over-allocated the table, so we're guaranteed to find an empty slot. 331 * Further, we guarantee that the hashtable size is not 0. 332 */ 333 while (hash_table[ent].name != NULL) { 334 if (hash_table[ent].name_length == length && 335 memcmp(hash_table[ent].name, name, length) == 0) { 336 // We've found a duplicate entry. We don't accept it 337 ALOGW("Zip: Found duplicate entry %.*s", length, name); 338 return kDuplicateEntry; 339 } 340 ent = (ent + 1) & (hash_table_size - 1); 341 } 342 343 hash_table[ent].name = name; 344 hash_table[ent].name_length = length; 345 return 0; 346} 347 348/* 349 * Get 2 little-endian bytes. 350 */ 351static uint16_t get2LE(const uint8_t* src) { 352 return src[0] | (src[1] << 8); 353} 354 355/* 356 * Get 4 little-endian bytes. 357 */ 358static uint32_t get4LE(const uint8_t* src) { 359 uint32_t result; 360 361 result = src[0]; 362 result |= src[1] << 8; 363 result |= src[2] << 16; 364 result |= src[3] << 24; 365 366 return result; 367} 368 369static int32_t MapCentralDirectory0(int fd, const char* debug_file_name, 370 ZipArchive* archive, off64_t file_length, 371 uint32_t read_amount, uint8_t* scan_buffer) { 372 const off64_t search_start = file_length - read_amount; 373 374 if (lseek64(fd, search_start, SEEK_SET) != search_start) { 375 ALOGW("Zip: seek %lld failed: %s", search_start, strerror(errno)); 376 return kIoError; 377 } 378 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, scan_buffer, read_amount)); 379 if (actual != (ssize_t) read_amount) { 380 ALOGW("Zip: read %zd failed: %s", read_amount, strerror(errno)); 381 return kIoError; 382 } 383 384 /* 385 * Scan backward for the EOCD magic. In an archive without a trailing 386 * comment, we'll find it on the first try. (We may want to consider 387 * doing an initial minimal read; if we don't find it, retry with a 388 * second read as above.) 389 */ 390 int i; 391 for (i = read_amount - kEOCDLen; i >= 0; i--) { 392 if (scan_buffer[i] == 0x50 && get4LE(&scan_buffer[i]) == kEOCDSignature) { 393 ALOGV("+++ Found EOCD at buf+%d", i); 394 break; 395 } 396 } 397 if (i < 0) { 398 ALOGD("Zip: EOCD not found, %s is not zip", debug_file_name); 399 return kInvalidFile; 400 } 401 402 const off64_t eocd_offset = search_start + i; 403 const uint8_t* eocd_ptr = scan_buffer + i; 404 405 assert(eocd_offset < file_length); 406 407 /* 408 * Grab the CD offset and size, and the number of entries in the 409 * archive. Verify that they look reasonable. Widen dir_size and 410 * dir_offset to the file offset type. 411 */ 412 const uint16_t num_entries = get2LE(eocd_ptr + kEOCDNumEntries); 413 const off64_t dir_size = get4LE(eocd_ptr + kEOCDSize); 414 const off64_t dir_offset = get4LE(eocd_ptr + kEOCDFileOffset); 415 416 if (dir_offset + dir_size > eocd_offset) { 417 ALOGW("Zip: bad offsets (dir %lld, size %lld, eocd %lld)", 418 dir_offset, dir_size, eocd_offset); 419 return kInvalidOffset; 420 } 421 if (num_entries == 0) { 422 ALOGW("Zip: empty archive?"); 423 return kEmptyArchive; 424 } 425 426 ALOGV("+++ num_entries=%d dir_size=%d dir_offset=%d", num_entries, dir_size, 427 dir_offset); 428 429 /* 430 * It all looks good. Create a mapping for the CD, and set the fields 431 * in archive. 432 */ 433 const int32_t result = MapFileSegment(fd, dir_offset, dir_size, 434 PROT_READ, MAP_FILE | MAP_SHARED, 435 &(archive->directory_map)); 436 if (result) { 437 return result; 438 } 439 440 archive->num_entries = num_entries; 441 archive->directory_offset = dir_offset; 442 443 return 0; 444} 445 446/* 447 * Find the zip Central Directory and memory-map it. 448 * 449 * On success, returns 0 after populating fields from the EOCD area: 450 * directory_offset 451 * directory_map 452 * num_entries 453 */ 454static int32_t MapCentralDirectory(int fd, const char* debug_file_name, 455 ZipArchive* archive) { 456 457 // Test file length. We use lseek64 to make sure the file 458 // is small enough to be a zip file (Its size must be less than 459 // 0xffffffff bytes). 460 off64_t file_length = lseek64(fd, 0, SEEK_END); 461 if (file_length == -1) { 462 ALOGV("Zip: lseek on fd %d failed", fd); 463 return kInvalidFile; 464 } 465 466 if (file_length > (off64_t) 0xffffffff) { 467 ALOGV("Zip: zip file too long %d", file_length); 468 return kInvalidFile; 469 } 470 471 if (file_length < (int64_t) kEOCDLen) { 472 ALOGV("Zip: length %ld is too small to be zip", file_length); 473 return kInvalidFile; 474 } 475 476 /* 477 * Perform the traditional EOCD snipe hunt. 478 * 479 * We're searching for the End of Central Directory magic number, 480 * which appears at the start of the EOCD block. It's followed by 481 * 18 bytes of EOCD stuff and up to 64KB of archive comment. We 482 * need to read the last part of the file into a buffer, dig through 483 * it to find the magic number, parse some values out, and use those 484 * to determine the extent of the CD. 485 * 486 * We start by pulling in the last part of the file. 487 */ 488 uint32_t read_amount = kMaxEOCDSearch; 489 if (file_length < (off64_t) read_amount) { 490 read_amount = file_length; 491 } 492 493 uint8_t* scan_buffer = (uint8_t*) malloc(read_amount); 494 int32_t result = MapCentralDirectory0(fd, debug_file_name, archive, 495 file_length, read_amount, scan_buffer); 496 497 free(scan_buffer); 498 return result; 499} 500 501/* 502 * Parses the Zip archive's Central Directory. Allocates and populates the 503 * hash table. 504 * 505 * Returns 0 on success. 506 */ 507static int32_t ParseZipArchive(ZipArchive* archive) { 508 int32_t result = -1; 509 const uint8_t* cd_ptr = (const uint8_t*) archive->directory_map.addr; 510 size_t cd_length = archive->directory_map.length; 511 uint16_t num_entries = archive->num_entries; 512 513 /* 514 * Create hash table. We have a minimum 75% load factor, possibly as 515 * low as 50% after we round off to a power of 2. There must be at 516 * least one unused entry to avoid an infinite loop during creation. 517 */ 518 archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3); 519 archive->hash_table = (ZipEntryName*) calloc(archive->hash_table_size, 520 sizeof(ZipEntryName)); 521 522 /* 523 * Walk through the central directory, adding entries to the hash 524 * table and verifying values. 525 */ 526 const uint8_t* ptr = cd_ptr; 527 for (uint16_t i = 0; i < num_entries; i++) { 528 if (get4LE(ptr) != kCDESignature) { 529 ALOGW("Zip: missed a central dir sig (at %d)", i); 530 goto bail; 531 } 532 533 if (ptr + kCDELen > cd_ptr + cd_length) { 534 ALOGW("Zip: ran off the end (at %d)", i); 535 goto bail; 536 } 537 538 const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset); 539 if (local_header_offset >= archive->directory_offset) { 540 ALOGW("Zip: bad LFH offset %lld at entry %d", local_header_offset, i); 541 goto bail; 542 } 543 544 const uint16_t file_name_length = get2LE(ptr + kCDENameLen); 545 const uint16_t extra_length = get2LE(ptr + kCDEExtraLen); 546 const uint16_t comment_length = get2LE(ptr + kCDECommentLen); 547 548 /* add the CDE filename to the hash table */ 549 const int add_result = AddToHash(archive->hash_table, 550 archive->hash_table_size, (const char*) ptr + kCDELen, file_name_length); 551 if (add_result) { 552 ALOGW("Zip: Error adding entry to hash table %d", add_result); 553 result = add_result; 554 goto bail; 555 } 556 557 ptr += kCDELen + file_name_length + extra_length + comment_length; 558 if ((size_t)(ptr - cd_ptr) > cd_length) { 559 ALOGW("Zip: bad CD advance (%d vs %zd) at entry %d", 560 (int) (ptr - cd_ptr), cd_length, i); 561 goto bail; 562 } 563 } 564 ALOGV("+++ zip good scan %d entries", num_entries); 565 566 result = 0; 567 568bail: 569 return result; 570} 571 572static int32_t OpenArchiveInternal(ZipArchive* archive, 573 const char* debug_file_name) { 574 int32_t result = -1; 575 if ((result = MapCentralDirectory(archive->fd, debug_file_name, archive))) { 576 return result; 577 } 578 579 if ((result = ParseZipArchive(archive))) { 580 return result; 581 } 582 583 return 0; 584} 585 586int32_t OpenArchiveFd(int fd, const char* debug_file_name, 587 ZipArchiveHandle* handle) { 588 ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive)); 589 memset(archive, 0, sizeof(*archive)); 590 *handle = archive; 591 592 archive->fd = fd; 593 594 return OpenArchiveInternal(archive, debug_file_name); 595} 596 597int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle) { 598 ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive)); 599 memset(archive, 0, sizeof(*archive)); 600 *handle = archive; 601 602 const int fd = open(fileName, O_RDONLY | O_BINARY, 0); 603 if (fd < 0) { 604 ALOGW("Unable to open '%s': %s", fileName, strerror(errno)); 605 return kIoError; 606 } else { 607 archive->fd = fd; 608 } 609 610 return OpenArchiveInternal(archive, fileName); 611} 612 613/* 614 * Close a ZipArchive, closing the file and freeing the contents. 615 */ 616void CloseArchive(ZipArchiveHandle handle) { 617 ZipArchive* archive = (ZipArchive*) handle; 618 ALOGV("Closing archive %p", archive); 619 620 if (archive->fd >= 0) { 621 close(archive->fd); 622 } 623 624 ReleaseMappedSegment(&archive->directory_map); 625 free(archive->hash_table); 626 627 /* ensure nobody tries to use the ZipArchive after it's closed */ 628 archive->directory_offset = -1; 629 archive->fd = -1; 630 archive->num_entries = -1; 631 archive->hash_table_size = -1; 632 archive->hash_table = NULL; 633} 634 635static int32_t UpdateEntryFromDataDescriptor(int fd, 636 ZipEntry *entry) { 637 uint8_t ddBuf[kDDMaxLen]; 638 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, ddBuf, sizeof(ddBuf))); 639 if (actual != sizeof(ddBuf)) { 640 return kIoError; 641 } 642 643 const uint32_t ddSignature = get4LE(ddBuf); 644 uint16_t ddOffset = 0; 645 if (ddSignature == kDDOptSignature) { 646 ddOffset = 4; 647 } 648 649 entry->crc32 = get4LE(ddBuf + ddOffset + kDDCrc32); 650 entry->compressed_length = get4LE(ddBuf + ddOffset + kDDCompLen); 651 entry->uncompressed_length = get4LE(ddBuf + ddOffset + kDDUncompLen); 652 653 return 0; 654} 655 656// Attempts to read |len| bytes into |buf| at offset |off|. 657// 658// This method uses pread64 on platforms that support it and 659// lseek64 + read on platforms that don't. This implies that 660// callers should not rely on the |fd| offset being incremented 661// as a side effect of this call. 662static inline ssize_t ReadAtOffset(int fd, uint8_t* buf, size_t len, 663 off64_t off) { 664#ifdef HAVE_PREAD 665 return TEMP_FAILURE_RETRY(pread64(fd, buf, len, off)); 666#else 667 // The only supported platform that doesn't support pread at the moment 668 // is Windows. Only recent versions of windows support unix like forks, 669 // and even there the semantics are quite different. 670 if (lseek64(fd, off, SEEK_SET) != off) { 671 ALOGW("Zip: failed seek to offset %lld", name_offset); 672 return kIoError; 673 } 674 675 return TEMP_FAILURE_RETRY(read(fd, buf, len)); 676#endif // HAVE_PREAD 677} 678 679static int32_t FindEntry(const ZipArchive* archive, const int ent, 680 ZipEntry* data) { 681 const uint16_t nameLen = archive->hash_table[ent].name_length; 682 const char* name = archive->hash_table[ent].name; 683 684 // Recover the start of the central directory entry from the filename 685 // pointer. The filename is the first entry past the fixed-size data, 686 // so we can just subtract back from that. 687 const unsigned char* ptr = (const unsigned char*) name; 688 ptr -= kCDELen; 689 690 // This is the base of our mmapped region, we have to sanity check that 691 // the name that's in the hash table is a pointer to a location within 692 // this mapped region. 693 const unsigned char* base_ptr = (const unsigned char*) 694 archive->directory_map.addr; 695 if (ptr < base_ptr || ptr > base_ptr + archive->directory_map.length) { 696 ALOGW("Zip: Invalid entry pointer"); 697 return kInvalidOffset; 698 } 699 700 // The offset of the start of the central directory in the zipfile. 701 // We keep this lying around so that we can sanity check all our lengths 702 // and our per-file structures. 703 const off64_t cd_offset = archive->directory_offset; 704 705 // Fill out the compression method, modification time, crc32 706 // and other interesting attributes from the central directory. These 707 // will later be compared against values from the local file header. 708 data->method = get2LE(ptr + kCDEMethod); 709 data->mod_time = get4LE(ptr + kCDEModWhen); 710 data->crc32 = get4LE(ptr + kCDECRC); 711 data->compressed_length = get4LE(ptr + kCDECompLen); 712 data->uncompressed_length = get4LE(ptr + kCDEUncompLen); 713 714 // Figure out the local header offset from the central directory. The 715 // actual file data will begin after the local header and the name / 716 // extra comments. 717 const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset); 718 if (local_header_offset + (off64_t) kLFHLen >= cd_offset) { 719 ALOGW("Zip: bad local hdr offset in zip"); 720 return kInvalidOffset; 721 } 722 723 uint8_t lfh_buf[kLFHLen]; 724 ssize_t actual = ReadAtOffset(archive->fd, lfh_buf, sizeof(lfh_buf), 725 local_header_offset); 726 if (actual != sizeof(lfh_buf)) { 727 ALOGW("Zip: failed reading lfh name from offset %lld", local_header_offset); 728 return kIoError; 729 } 730 731 if (get4LE(lfh_buf) != kLFHSignature) { 732 ALOGW("Zip: didn't find signature at start of lfh, offset=%lld", 733 local_header_offset); 734 return kInvalidOffset; 735 } 736 737 // Paranoia: Match the values specified in the local file header 738 // to those specified in the central directory. 739 const uint16_t lfhGpbFlags = get2LE(lfh_buf + kLFHGPBFlags); 740 const uint16_t lfhNameLen = get2LE(lfh_buf + kLFHNameLen); 741 const uint16_t lfhExtraLen = get2LE(lfh_buf + kLFHExtraLen); 742 743 if ((lfhGpbFlags & kGPBDDFlagMask) == 0) { 744 const uint32_t lfhCrc = get4LE(lfh_buf + kLFHCRC); 745 const uint32_t lfhCompLen = get4LE(lfh_buf + kLFHCompLen); 746 const uint32_t lfhUncompLen = get4LE(lfh_buf + kLFHUncompLen); 747 748 data->has_data_descriptor = 0; 749 if (data->compressed_length != lfhCompLen || data->uncompressed_length != lfhUncompLen 750 || data->crc32 != lfhCrc) { 751 ALOGW("Zip: size/crc32 mismatch. expected {%d, %d, %x}, was {%d, %d, %x}", 752 data->compressed_length, data->uncompressed_length, data->crc32, 753 lfhCompLen, lfhUncompLen, lfhCrc); 754 return kInconsistentInformation; 755 } 756 } else { 757 data->has_data_descriptor = 1; 758 } 759 760 // Check that the local file header name matches the declared 761 // name in the central directory. 762 if (lfhNameLen == nameLen) { 763 const off64_t name_offset = local_header_offset + kLFHLen; 764 if (name_offset + lfhNameLen >= cd_offset) { 765 ALOGW("Zip: Invalid declared length"); 766 return kInvalidOffset; 767 } 768 769 uint8_t* name_buf = (uint8_t*) malloc(nameLen); 770 ssize_t actual = ReadAtOffset(archive->fd, name_buf, nameLen, 771 name_offset); 772 773 if (actual != nameLen) { 774 ALOGW("Zip: failed reading lfh name from offset %lld", name_offset); 775 free(name_buf); 776 return kIoError; 777 } 778 779 if (memcmp(name, name_buf, nameLen)) { 780 free(name_buf); 781 return kInconsistentInformation; 782 } 783 784 free(name_buf); 785 } else { 786 ALOGW("Zip: lfh name did not match central directory."); 787 return kInconsistentInformation; 788 } 789 790 const off64_t data_offset = local_header_offset + kLFHLen + lfhNameLen + lfhExtraLen; 791 if (data_offset >= cd_offset) { 792 ALOGW("Zip: bad data offset %lld in zip", (off64_t) data_offset); 793 return kInvalidOffset; 794 } 795 796 if ((off64_t)(data_offset + data->compressed_length) > cd_offset) { 797 ALOGW("Zip: bad compressed length in zip (%lld + %zd > %lld)", 798 data_offset, data->compressed_length, cd_offset); 799 return kInvalidOffset; 800 } 801 802 if (data->method == kCompressStored && 803 (off64_t)(data_offset + data->uncompressed_length) > cd_offset) { 804 ALOGW("Zip: bad uncompressed length in zip (%lld + %zd > %lld)", 805 data_offset, data->uncompressed_length, cd_offset); 806 return kInvalidOffset; 807 } 808 809 data->offset = data_offset; 810 return 0; 811} 812 813struct IterationHandle { 814 uint32_t position; 815 const char* prefix; 816 uint16_t prefix_len; 817 ZipArchive* archive; 818}; 819 820int32_t StartIteration(ZipArchiveHandle handle, void** cookie_ptr, const char* prefix) { 821 ZipArchive* archive = (ZipArchive *) handle; 822 823 if (archive == NULL || archive->hash_table == NULL) { 824 ALOGW("Zip: Invalid ZipArchiveHandle"); 825 return kInvalidHandle; 826 } 827 828 IterationHandle* cookie = (IterationHandle*) malloc(sizeof(IterationHandle)); 829 cookie->position = 0; 830 cookie->prefix = prefix; 831 cookie->archive = archive; 832 if (prefix != NULL) { 833 cookie->prefix_len = strlen(prefix); 834 } 835 836 *cookie_ptr = cookie ; 837 return 0; 838} 839 840int32_t FindEntry(const ZipArchiveHandle handle, const char* entryName, 841 ZipEntry* data) { 842 const ZipArchive* archive = (ZipArchive*) handle; 843 const int nameLen = strlen(entryName); 844 if (nameLen == 0 || nameLen > 65535) { 845 ALOGW("Zip: Invalid filename %s", entryName); 846 return kInvalidEntryName; 847 } 848 849 const int64_t ent = EntryToIndex(archive->hash_table, 850 archive->hash_table_size, entryName, nameLen); 851 852 if (ent < 0) { 853 ALOGD("Zip: Could not find entry %.*s", nameLen, entryName); 854 return ent; 855 } 856 857 return FindEntry(archive, ent, data); 858} 859 860int32_t Next(void* cookie, ZipEntry* data, ZipEntryName* name) { 861 IterationHandle* handle = (IterationHandle *) cookie; 862 if (handle == NULL) { 863 return kInvalidHandle; 864 } 865 866 ZipArchive* archive = handle->archive; 867 if (archive == NULL || archive->hash_table == NULL) { 868 ALOGW("Zip: Invalid ZipArchiveHandle"); 869 return kInvalidHandle; 870 } 871 872 const uint32_t currentOffset = handle->position; 873 const uint32_t hash_table_length = archive->hash_table_size; 874 const ZipEntryName *hash_table = archive->hash_table; 875 876 for (uint32_t i = currentOffset; i < hash_table_length; ++i) { 877 if (hash_table[i].name != NULL && 878 (handle->prefix == NULL || 879 (memcmp(handle->prefix, hash_table[i].name, handle->prefix_len) == 0))) { 880 handle->position = (i + 1); 881 const int error = FindEntry(archive, i, data); 882 if (!error) { 883 name->name = hash_table[i].name; 884 name->name_length = hash_table[i].name_length; 885 } 886 887 return error; 888 } 889 } 890 891 handle->position = 0; 892 return kIterationEnd; 893} 894 895static int32_t InflateToFile(int fd, const ZipEntry* entry, 896 uint8_t* begin, uint32_t length, 897 uint64_t* crc_out) { 898 int32_t result = -1; 899 const uint32_t kBufSize = 32768; 900 uint8_t read_buf[kBufSize]; 901 uint8_t write_buf[kBufSize]; 902 z_stream zstream; 903 int zerr; 904 905 /* 906 * Initialize the zlib stream struct. 907 */ 908 memset(&zstream, 0, sizeof(zstream)); 909 zstream.zalloc = Z_NULL; 910 zstream.zfree = Z_NULL; 911 zstream.opaque = Z_NULL; 912 zstream.next_in = NULL; 913 zstream.avail_in = 0; 914 zstream.next_out = (Bytef*) write_buf; 915 zstream.avail_out = kBufSize; 916 zstream.data_type = Z_UNKNOWN; 917 918 /* 919 * Use the undocumented "negative window bits" feature to tell zlib 920 * that there's no zlib header waiting for it. 921 */ 922 zerr = inflateInit2(&zstream, -MAX_WBITS); 923 if (zerr != Z_OK) { 924 if (zerr == Z_VERSION_ERROR) { 925 ALOGE("Installed zlib is not compatible with linked version (%s)", 926 ZLIB_VERSION); 927 } else { 928 ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr); 929 } 930 931 return kZlibError; 932 } 933 934 const uint32_t uncompressed_length = entry->uncompressed_length; 935 936 uint32_t compressed_length = entry->compressed_length; 937 uint32_t write_count = 0; 938 do { 939 /* read as much as we can */ 940 if (zstream.avail_in == 0) { 941 const ssize_t getSize = (compressed_length > kBufSize) ? kBufSize : compressed_length; 942 const ssize_t actual = TEMP_FAILURE_RETRY(read(fd, read_buf, getSize)); 943 if (actual != getSize) { 944 ALOGW("Zip: inflate read failed (%d vs %zd)", actual, getSize); 945 result = kIoError; 946 goto z_bail; 947 } 948 949 compressed_length -= getSize; 950 951 zstream.next_in = read_buf; 952 zstream.avail_in = getSize; 953 } 954 955 /* uncompress the data */ 956 zerr = inflate(&zstream, Z_NO_FLUSH); 957 if (zerr != Z_OK && zerr != Z_STREAM_END) { 958 ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)", 959 zerr, zstream.next_in, zstream.avail_in, 960 zstream.next_out, zstream.avail_out); 961 result = kZlibError; 962 goto z_bail; 963 } 964 965 /* write when we're full or when we're done */ 966 if (zstream.avail_out == 0 || 967 (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) { 968 const size_t write_size = zstream.next_out - write_buf; 969 // The file might have declared a bogus length. 970 if (write_size + write_count > length) { 971 goto z_bail; 972 } 973 memcpy(begin + write_count, write_buf, write_size); 974 write_count += write_size; 975 976 zstream.next_out = write_buf; 977 zstream.avail_out = kBufSize; 978 } 979 } while (zerr == Z_OK); 980 981 assert(zerr == Z_STREAM_END); /* other errors should've been caught */ 982 983 // stream.adler holds the crc32 value for such streams. 984 *crc_out = zstream.adler; 985 986 if (zstream.total_out != uncompressed_length || compressed_length != 0) { 987 ALOGW("Zip: size mismatch on inflated file (%ld vs %zd)", 988 zstream.total_out, uncompressed_length); 989 result = kInconsistentInformation; 990 goto z_bail; 991 } 992 993 result = 0; 994 995z_bail: 996 inflateEnd(&zstream); /* free up any allocated structures */ 997 998 return result; 999} 1000 1001int32_t ExtractToMemory(ZipArchiveHandle handle, 1002 ZipEntry* entry, uint8_t* begin, uint32_t size) { 1003 ZipArchive* archive = (ZipArchive*) handle; 1004 const uint16_t method = entry->method; 1005 off64_t data_offset = entry->offset; 1006 1007 if (lseek64(archive->fd, data_offset, SEEK_SET) != data_offset) { 1008 ALOGW("Zip: lseek to data at %lld failed", (off64_t) data_offset); 1009 return kIoError; 1010 } 1011 1012 // this should default to kUnknownCompressionMethod. 1013 int32_t return_value = -1; 1014 uint64_t crc = 0; 1015 if (method == kCompressStored) { 1016 return_value = CopyFileToFile(archive->fd, begin, size, &crc); 1017 } else if (method == kCompressDeflated) { 1018 return_value = InflateToFile(archive->fd, entry, begin, size, &crc); 1019 } 1020 1021 if (!return_value && entry->has_data_descriptor) { 1022 return_value = UpdateEntryFromDataDescriptor(archive->fd, entry); 1023 if (return_value) { 1024 return return_value; 1025 } 1026 } 1027 1028 // TODO: Fix this check by passing the right flags to inflate2 so that 1029 // it calculates the CRC for us. 1030 if (entry->crc32 != crc && false) { 1031 ALOGW("Zip: crc mismatch: expected %u, was %llu", entry->crc32, crc); 1032 return kInconsistentInformation; 1033 } 1034 1035 return return_value; 1036} 1037 1038int32_t ExtractEntryToFile(ZipArchiveHandle handle, 1039 ZipEntry* entry, int fd) { 1040 const int32_t declared_length = entry->uncompressed_length; 1041 1042 int result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length)); 1043 if (result == -1) { 1044 ALOGW("Zip: unable to truncate file to %ud", declared_length); 1045 return kIoError; 1046 } 1047 1048 MemMapping mapping; 1049 int32_t error = MapFileSegment(fd, 0, declared_length, 1050 PROT_READ | PROT_WRITE, 1051 MAP_FILE | MAP_SHARED, 1052 &mapping); 1053 if (error) { 1054 return error; 1055 } 1056 1057 error = ExtractToMemory(handle, entry, mapping.addr, 1058 mapping.length); 1059 ReleaseMappedSegment(&mapping); 1060 return error; 1061} 1062 1063const char* ErrorCodeString(int32_t error_code) { 1064 if (error_code > kErrorMessageLowerBound && error_code < kErrorMessageUpperBound) { 1065 return kErrorMessages[error_code * -1]; 1066 } 1067 1068 return kErrorMessages[0]; 1069} 1070 1071int GetFileDescriptor(const ZipArchiveHandle handle) { 1072 return ((ZipArchive*) handle)->fd; 1073} 1074 1075