zip_archive.cc revision 56a90a08dbbbf96ef415dc6bc84bff2a409efc68
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* 18 * Read-only access to Zip archives, with minimal heap allocation. 19 */ 20 21#include <assert.h> 22#include <errno.h> 23#include <fcntl.h> 24#include <inttypes.h> 25#include <limits.h> 26#include <log/log.h> 27#include <stdlib.h> 28#include <string.h> 29#include <unistd.h> 30#include <utils/Compat.h> 31#include <utils/FileMap.h> 32#include <zlib.h> 33 34#include <JNIHelp.h> // TEMP_FAILURE_RETRY may or may not be in unistd 35 36#include "ziparchive/zip_archive.h" 37 38// This is for windows. If we don't open a file in binary mode, weirds 39// things will happen. 40#ifndef O_BINARY 41#define O_BINARY 0 42#endif 43 44/* 45 * Zip file constants. 46 */ 47static const uint32_t kEOCDSignature = 0x06054b50; 48static const uint32_t kEOCDLen = 2; 49static const uint32_t kEOCDNumEntries = 8; // offset to #of entries in file 50static const uint32_t kEOCDSize = 12; // size of the central directory 51static const uint32_t kEOCDFileOffset = 16; // offset to central directory 52 53static const uint32_t kMaxCommentLen = 65535; // longest possible in ushort 54static const uint32_t kMaxEOCDSearch = (kMaxCommentLen + kEOCDLen); 55 56static const uint32_t kLFHSignature = 0x04034b50; 57static const uint32_t kLFHLen = 30; // excluding variable-len fields 58static const uint32_t kLFHGPBFlags = 6; // general purpose bit flags 59static const uint32_t kLFHCRC = 14; // offset to CRC 60static const uint32_t kLFHCompLen = 18; // offset to compressed length 61static const uint32_t kLFHUncompLen = 22; // offset to uncompressed length 62static const uint32_t kLFHNameLen = 26; // offset to filename length 63static const uint32_t kLFHExtraLen = 28; // offset to extra length 64 65static const uint32_t kCDESignature = 0x02014b50; 66static const uint32_t kCDELen = 46; // excluding variable-len fields 67static const uint32_t kCDEMethod = 10; // offset to compression method 68static const uint32_t kCDEModWhen = 12; // offset to modification timestamp 69static const uint32_t kCDECRC = 16; // offset to entry CRC 70static const uint32_t kCDECompLen = 20; // offset to compressed length 71static const uint32_t kCDEUncompLen = 24; // offset to uncompressed length 72static const uint32_t kCDENameLen = 28; // offset to filename length 73static const uint32_t kCDEExtraLen = 30; // offset to extra length 74static const uint32_t kCDECommentLen = 32; // offset to comment length 75static const uint32_t kCDELocalOffset = 42; // offset to local hdr 76 77static const uint32_t kDDOptSignature = 0x08074b50; // *OPTIONAL* data descriptor signature 78static const uint32_t kDDSignatureLen = 4; 79static const uint32_t kDDLen = 12; 80static const uint32_t kDDMaxLen = 16; // max of 16 bytes with a signature, 12 bytes without 81static const uint32_t kDDCrc32 = 0; // offset to crc32 82static const uint32_t kDDCompLen = 4; // offset to compressed length 83static const uint32_t kDDUncompLen = 8; // offset to uncompressed length 84 85static const uint32_t kGPBDDFlagMask = 0x0008; // mask value that signifies that the entry has a DD 86 87static const uint32_t kMaxErrorLen = 1024; 88 89static const char* kErrorMessages[] = { 90 "Unknown return code.", 91 "Iteration ended", 92 "Zlib error", 93 "Invalid file", 94 "Invalid handle", 95 "Duplicate entries in archive", 96 "Empty archive", 97 "Entry not found", 98 "Invalid offset", 99 "Inconsistent information", 100 "Invalid entry name", 101 "I/O Error", 102 "File mapping failed" 103}; 104 105static const int32_t kErrorMessageUpperBound = 0; 106 107static const int32_t kIterationEnd = -1; 108 109// We encountered a Zlib error when inflating a stream from this file. 110// Usually indicates file corruption. 111static const int32_t kZlibError = -2; 112 113// The input file cannot be processed as a zip archive. Usually because 114// it's too small, too large or does not have a valid signature. 115static const int32_t kInvalidFile = -3; 116 117// An invalid iteration / ziparchive handle was passed in as an input 118// argument. 119static const int32_t kInvalidHandle = -4; 120 121// The zip archive contained two (or possibly more) entries with the same 122// name. 123static const int32_t kDuplicateEntry = -5; 124 125// The zip archive contains no entries. 126static const int32_t kEmptyArchive = -6; 127 128// The specified entry was not found in the archive. 129static const int32_t kEntryNotFound = -7; 130 131// The zip archive contained an invalid local file header pointer. 132static const int32_t kInvalidOffset = -8; 133 134// The zip archive contained inconsistent entry information. This could 135// be because the central directory & local file header did not agree, or 136// if the actual uncompressed length or crc32 do not match their declared 137// values. 138static const int32_t kInconsistentInformation = -9; 139 140// An invalid entry name was encountered. 141static const int32_t kInvalidEntryName = -10; 142 143// An I/O related system call (read, lseek, ftruncate, map) failed. 144static const int32_t kIoError = -11; 145 146// We were not able to mmap the central directory or entry contents. 147static const int32_t kMmapFailed = -12; 148 149static const int32_t kErrorMessageLowerBound = -13; 150 151static const char kTempMappingFileName[] = "zip: ExtractFileToFile"; 152 153/* 154 * A Read-only Zip archive. 155 * 156 * We want "open" and "find entry by name" to be fast operations, and 157 * we want to use as little memory as possible. We memory-map the zip 158 * central directory, and load a hash table with pointers to the filenames 159 * (which aren't null-terminated). The other fields are at a fixed offset 160 * from the filename, so we don't need to extract those (but we do need 161 * to byte-read and endian-swap them every time we want them). 162 * 163 * It's possible that somebody has handed us a massive (~1GB) zip archive, 164 * so we can't expect to mmap the entire file. 165 * 166 * To speed comparisons when doing a lookup by name, we could make the mapping 167 * "private" (copy-on-write) and null-terminate the filenames after verifying 168 * the record structure. However, this requires a private mapping of 169 * every page that the Central Directory touches. Easier to tuck a copy 170 * of the string length into the hash table entry. 171 */ 172struct ZipArchive { 173 /* open Zip archive */ 174 int fd; 175 176 /* mapped central directory area */ 177 off64_t directory_offset; 178 android::FileMap* directory_map; 179 180 /* number of entries in the Zip archive */ 181 uint16_t num_entries; 182 183 /* 184 * We know how many entries are in the Zip archive, so we can have a 185 * fixed-size hash table. We define a load factor of 0.75 and overallocat 186 * so the maximum number entries can never be higher than 187 * ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t. 188 */ 189 uint32_t hash_table_size; 190 ZipEntryName* hash_table; 191}; 192 193// Returns 0 on success and negative values on failure. 194static android::FileMap* MapFileSegment(const int fd, const off64_t start, 195 const size_t length, const bool read_only, 196 const char* debug_file_name) { 197 android::FileMap* file_map = new android::FileMap; 198 const bool success = file_map->create(debug_file_name, fd, start, length, read_only); 199 if (!success) { 200 file_map->release(); 201 return NULL; 202 } 203 204 return file_map; 205} 206 207static int32_t CopyFileToFile(int fd, uint8_t* begin, const uint32_t length, uint64_t *crc_out) { 208 static const uint32_t kBufSize = 32768; 209 uint8_t buf[kBufSize]; 210 211 uint32_t count = 0; 212 uint64_t crc = 0; 213 while (count < length) { 214 uint32_t remaining = length - count; 215 216 // Safe conversion because kBufSize is narrow enough for a 32 bit signed 217 // value. 218 ssize_t get_size = (remaining > kBufSize) ? kBufSize : remaining; 219 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, buf, get_size)); 220 221 if (actual != get_size) { 222 ALOGW("CopyFileToFile: copy read failed (" ZD " vs " ZD ")", actual, get_size); 223 return kIoError; 224 } 225 226 memcpy(begin + count, buf, get_size); 227 crc = crc32(crc, buf, get_size); 228 count += get_size; 229 } 230 231 *crc_out = crc; 232 233 return 0; 234} 235 236/* 237 * Round up to the next highest power of 2. 238 * 239 * Found on http://graphics.stanford.edu/~seander/bithacks.html. 240 */ 241static uint32_t RoundUpPower2(uint32_t val) { 242 val--; 243 val |= val >> 1; 244 val |= val >> 2; 245 val |= val >> 4; 246 val |= val >> 8; 247 val |= val >> 16; 248 val++; 249 250 return val; 251} 252 253static uint32_t ComputeHash(const char* str, uint16_t len) { 254 uint32_t hash = 0; 255 256 while (len--) { 257 hash = hash * 31 + *str++; 258 } 259 260 return hash; 261} 262 263/* 264 * Convert a ZipEntry to a hash table index, verifying that it's in a 265 * valid range. 266 */ 267static int64_t EntryToIndex(const ZipEntryName* hash_table, 268 const uint32_t hash_table_size, 269 const char* name, uint16_t length) { 270 const uint32_t hash = ComputeHash(name, length); 271 272 // NOTE: (hash_table_size - 1) is guaranteed to be non-negative. 273 uint32_t ent = hash & (hash_table_size - 1); 274 while (hash_table[ent].name != NULL) { 275 if (hash_table[ent].name_length == length && 276 memcmp(hash_table[ent].name, name, length) == 0) { 277 return ent; 278 } 279 280 ent = (ent + 1) & (hash_table_size - 1); 281 } 282 283 ALOGV("Zip: Unable to find entry %.*s", length, name); 284 return kEntryNotFound; 285} 286 287/* 288 * Add a new entry to the hash table. 289 */ 290static int32_t AddToHash(ZipEntryName *hash_table, const uint64_t hash_table_size, 291 const char* name, uint16_t length) { 292 const uint64_t hash = ComputeHash(name, length); 293 uint32_t ent = hash & (hash_table_size - 1); 294 295 /* 296 * We over-allocated the table, so we're guaranteed to find an empty slot. 297 * Further, we guarantee that the hashtable size is not 0. 298 */ 299 while (hash_table[ent].name != NULL) { 300 if (hash_table[ent].name_length == length && 301 memcmp(hash_table[ent].name, name, length) == 0) { 302 // We've found a duplicate entry. We don't accept it 303 ALOGW("Zip: Found duplicate entry %.*s", length, name); 304 return kDuplicateEntry; 305 } 306 ent = (ent + 1) & (hash_table_size - 1); 307 } 308 309 hash_table[ent].name = name; 310 hash_table[ent].name_length = length; 311 return 0; 312} 313 314/* 315 * Get 2 little-endian bytes. 316 */ 317static uint16_t get2LE(const uint8_t* src) { 318 return src[0] | (src[1] << 8); 319} 320 321/* 322 * Get 4 little-endian bytes. 323 */ 324static uint32_t get4LE(const uint8_t* src) { 325 uint32_t result; 326 327 result = src[0]; 328 result |= src[1] << 8; 329 result |= src[2] << 16; 330 result |= src[3] << 24; 331 332 return result; 333} 334 335static int32_t MapCentralDirectory0(int fd, const char* debug_file_name, 336 ZipArchive* archive, off64_t file_length, 337 uint32_t read_amount, uint8_t* scan_buffer) { 338 const off64_t search_start = file_length - read_amount; 339 340 if (lseek64(fd, search_start, SEEK_SET) != search_start) { 341 ALOGW("Zip: seek %" PRId64 " failed: %s", search_start, strerror(errno)); 342 return kIoError; 343 } 344 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, scan_buffer, read_amount)); 345 if (actual != (ssize_t) read_amount) { 346 ALOGW("Zip: read %" PRIu32 " failed: %s", read_amount, strerror(errno)); 347 return kIoError; 348 } 349 350 /* 351 * Scan backward for the EOCD magic. In an archive without a trailing 352 * comment, we'll find it on the first try. (We may want to consider 353 * doing an initial minimal read; if we don't find it, retry with a 354 * second read as above.) 355 */ 356 int i; 357 for (i = read_amount - kEOCDLen; i >= 0; i--) { 358 if (scan_buffer[i] == 0x50 && get4LE(&scan_buffer[i]) == kEOCDSignature) { 359 ALOGV("+++ Found EOCD at buf+%d", i); 360 break; 361 } 362 } 363 if (i < 0) { 364 ALOGD("Zip: EOCD not found, %s is not zip", debug_file_name); 365 return kInvalidFile; 366 } 367 368 const off64_t eocd_offset = search_start + i; 369 const uint8_t* eocd_ptr = scan_buffer + i; 370 371 assert(eocd_offset < file_length); 372 373 /* 374 * Grab the CD offset and size, and the number of entries in the 375 * archive. Verify that they look reasonable. Widen dir_size and 376 * dir_offset to the file offset type. 377 */ 378 const uint16_t num_entries = get2LE(eocd_ptr + kEOCDNumEntries); 379 const off64_t dir_size = get4LE(eocd_ptr + kEOCDSize); 380 const off64_t dir_offset = get4LE(eocd_ptr + kEOCDFileOffset); 381 382 if (dir_offset + dir_size > eocd_offset) { 383 ALOGW("Zip: bad offsets (dir %" PRId64 ", size %" PRId64 ", eocd %" PRId64 ")", 384 dir_offset, dir_size, eocd_offset); 385 return kInvalidOffset; 386 } 387 if (num_entries == 0) { 388 ALOGW("Zip: empty archive?"); 389 return kEmptyArchive; 390 } 391 392 ALOGV("+++ num_entries=%d dir_size=%" PRId64 " dir_offset=%" PRId64, 393 num_entries, dir_size, dir_offset); 394 395 /* 396 * It all looks good. Create a mapping for the CD, and set the fields 397 * in archive. 398 */ 399 android::FileMap* map = MapFileSegment(fd, dir_offset, dir_size, 400 true /* read only */, debug_file_name); 401 if (map == NULL) { 402 archive->directory_map = NULL; 403 return kMmapFailed; 404 } 405 406 archive->directory_map = map; 407 archive->num_entries = num_entries; 408 archive->directory_offset = dir_offset; 409 410 return 0; 411} 412 413/* 414 * Find the zip Central Directory and memory-map it. 415 * 416 * On success, returns 0 after populating fields from the EOCD area: 417 * directory_offset 418 * directory_map 419 * num_entries 420 */ 421static int32_t MapCentralDirectory(int fd, const char* debug_file_name, 422 ZipArchive* archive) { 423 424 // Test file length. We use lseek64 to make sure the file 425 // is small enough to be a zip file (Its size must be less than 426 // 0xffffffff bytes). 427 off64_t file_length = lseek64(fd, 0, SEEK_END); 428 if (file_length == -1) { 429 ALOGV("Zip: lseek on fd %d failed", fd); 430 return kInvalidFile; 431 } 432 433 if (file_length > (off64_t) 0xffffffff) { 434 ALOGV("Zip: zip file too long %" PRId64, (int64_t)file_length); 435 return kInvalidFile; 436 } 437 438 if (file_length < (int64_t) kEOCDLen) { 439 ALOGV("Zip: length %" PRId64 " is too small to be zip", (int64_t)file_length); 440 return kInvalidFile; 441 } 442 443 /* 444 * Perform the traditional EOCD snipe hunt. 445 * 446 * We're searching for the End of Central Directory magic number, 447 * which appears at the start of the EOCD block. It's followed by 448 * 18 bytes of EOCD stuff and up to 64KB of archive comment. We 449 * need to read the last part of the file into a buffer, dig through 450 * it to find the magic number, parse some values out, and use those 451 * to determine the extent of the CD. 452 * 453 * We start by pulling in the last part of the file. 454 */ 455 uint32_t read_amount = kMaxEOCDSearch; 456 if (file_length < (off64_t) read_amount) { 457 read_amount = file_length; 458 } 459 460 uint8_t* scan_buffer = (uint8_t*) malloc(read_amount); 461 int32_t result = MapCentralDirectory0(fd, debug_file_name, archive, 462 file_length, read_amount, scan_buffer); 463 464 free(scan_buffer); 465 return result; 466} 467 468/* 469 * Parses the Zip archive's Central Directory. Allocates and populates the 470 * hash table. 471 * 472 * Returns 0 on success. 473 */ 474static int32_t ParseZipArchive(ZipArchive* archive) { 475 int32_t result = -1; 476 const uint8_t* cd_ptr = (const uint8_t*) archive->directory_map->getDataPtr(); 477 size_t cd_length = archive->directory_map->getDataLength(); 478 uint16_t num_entries = archive->num_entries; 479 480 /* 481 * Create hash table. We have a minimum 75% load factor, possibly as 482 * low as 50% after we round off to a power of 2. There must be at 483 * least one unused entry to avoid an infinite loop during creation. 484 */ 485 archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3); 486 archive->hash_table = (ZipEntryName*) calloc(archive->hash_table_size, 487 sizeof(ZipEntryName)); 488 489 /* 490 * Walk through the central directory, adding entries to the hash 491 * table and verifying values. 492 */ 493 const uint8_t* ptr = cd_ptr; 494 for (uint16_t i = 0; i < num_entries; i++) { 495 if (get4LE(ptr) != kCDESignature) { 496 ALOGW("Zip: missed a central dir sig (at %" PRIu16 ")", i); 497 goto bail; 498 } 499 500 if (ptr + kCDELen > cd_ptr + cd_length) { 501 ALOGW("Zip: ran off the end (at %" PRIu16 ")", i); 502 goto bail; 503 } 504 505 const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset); 506 if (local_header_offset >= archive->directory_offset) { 507 ALOGW("Zip: bad LFH offset %" PRId64 " at entry %" PRIu16, (int64_t)local_header_offset, i); 508 goto bail; 509 } 510 511 const uint16_t file_name_length = get2LE(ptr + kCDENameLen); 512 const uint16_t extra_length = get2LE(ptr + kCDEExtraLen); 513 const uint16_t comment_length = get2LE(ptr + kCDECommentLen); 514 515 /* add the CDE filename to the hash table */ 516 const int add_result = AddToHash(archive->hash_table, 517 archive->hash_table_size, (const char*) ptr + kCDELen, file_name_length); 518 if (add_result) { 519 ALOGW("Zip: Error adding entry to hash table %d", add_result); 520 result = add_result; 521 goto bail; 522 } 523 524 ptr += kCDELen + file_name_length + extra_length + comment_length; 525 if ((size_t)(ptr - cd_ptr) > cd_length) { 526 ALOGW("Zip: bad CD advance (%tu vs %zu) at entry %" PRIu16, 527 ptr - cd_ptr, cd_length, i); 528 goto bail; 529 } 530 } 531 ALOGV("+++ zip good scan %" PRIu16 " entries", num_entries); 532 533 result = 0; 534 535bail: 536 return result; 537} 538 539static int32_t OpenArchiveInternal(ZipArchive* archive, 540 const char* debug_file_name) { 541 int32_t result = -1; 542 if ((result = MapCentralDirectory(archive->fd, debug_file_name, archive))) { 543 return result; 544 } 545 546 if ((result = ParseZipArchive(archive))) { 547 return result; 548 } 549 550 return 0; 551} 552 553int32_t OpenArchiveFd(int fd, const char* debug_file_name, 554 ZipArchiveHandle* handle) { 555 ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive)); 556 memset(archive, 0, sizeof(*archive)); 557 *handle = archive; 558 559 archive->fd = fd; 560 561 return OpenArchiveInternal(archive, debug_file_name); 562} 563 564int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle) { 565 ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive)); 566 memset(archive, 0, sizeof(*archive)); 567 *handle = archive; 568 569 const int fd = open(fileName, O_RDONLY | O_BINARY, 0); 570 if (fd < 0) { 571 ALOGW("Unable to open '%s': %s", fileName, strerror(errno)); 572 return kIoError; 573 } else { 574 archive->fd = fd; 575 } 576 577 return OpenArchiveInternal(archive, fileName); 578} 579 580/* 581 * Close a ZipArchive, closing the file and freeing the contents. 582 */ 583void CloseArchive(ZipArchiveHandle handle) { 584 ZipArchive* archive = (ZipArchive*) handle; 585 ALOGV("Closing archive %p", archive); 586 587 if (archive->fd >= 0) { 588 close(archive->fd); 589 } 590 591 if (archive->directory_map != NULL) { 592 archive->directory_map->release(); 593 } 594 free(archive->hash_table); 595 free(archive); 596} 597 598static int32_t UpdateEntryFromDataDescriptor(int fd, 599 ZipEntry *entry) { 600 uint8_t ddBuf[kDDMaxLen]; 601 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, ddBuf, sizeof(ddBuf))); 602 if (actual != sizeof(ddBuf)) { 603 return kIoError; 604 } 605 606 const uint32_t ddSignature = get4LE(ddBuf); 607 uint16_t ddOffset = 0; 608 if (ddSignature == kDDOptSignature) { 609 ddOffset = 4; 610 } 611 612 entry->crc32 = get4LE(ddBuf + ddOffset + kDDCrc32); 613 entry->compressed_length = get4LE(ddBuf + ddOffset + kDDCompLen); 614 entry->uncompressed_length = get4LE(ddBuf + ddOffset + kDDUncompLen); 615 616 return 0; 617} 618 619// Attempts to read |len| bytes into |buf| at offset |off|. 620// 621// This method uses pread64 on platforms that support it and 622// lseek64 + read on platforms that don't. This implies that 623// callers should not rely on the |fd| offset being incremented 624// as a side effect of this call. 625static inline ssize_t ReadAtOffset(int fd, uint8_t* buf, size_t len, 626 off64_t off) { 627#ifdef HAVE_PREAD 628 return TEMP_FAILURE_RETRY(pread64(fd, buf, len, off)); 629#else 630 // The only supported platform that doesn't support pread at the moment 631 // is Windows. Only recent versions of windows support unix like forks, 632 // and even there the semantics are quite different. 633 if (lseek64(fd, off, SEEK_SET) != off) { 634 ALOGW("Zip: failed seek to offset %" PRId64, off); 635 return kIoError; 636 } 637 638 return TEMP_FAILURE_RETRY(read(fd, buf, len)); 639#endif // HAVE_PREAD 640} 641 642static int32_t FindEntry(const ZipArchive* archive, const int ent, 643 ZipEntry* data) { 644 const uint16_t nameLen = archive->hash_table[ent].name_length; 645 const char* name = archive->hash_table[ent].name; 646 647 // Recover the start of the central directory entry from the filename 648 // pointer. The filename is the first entry past the fixed-size data, 649 // so we can just subtract back from that. 650 const unsigned char* ptr = (const unsigned char*) name; 651 ptr -= kCDELen; 652 653 // This is the base of our mmapped region, we have to sanity check that 654 // the name that's in the hash table is a pointer to a location within 655 // this mapped region. 656 const unsigned char* base_ptr = (const unsigned char*) 657 archive->directory_map->getDataPtr(); 658 if (ptr < base_ptr || ptr > base_ptr + archive->directory_map->getDataLength()) { 659 ALOGW("Zip: Invalid entry pointer"); 660 return kInvalidOffset; 661 } 662 663 // The offset of the start of the central directory in the zipfile. 664 // We keep this lying around so that we can sanity check all our lengths 665 // and our per-file structures. 666 const off64_t cd_offset = archive->directory_offset; 667 668 // Fill out the compression method, modification time, crc32 669 // and other interesting attributes from the central directory. These 670 // will later be compared against values from the local file header. 671 data->method = get2LE(ptr + kCDEMethod); 672 data->mod_time = get4LE(ptr + kCDEModWhen); 673 data->crc32 = get4LE(ptr + kCDECRC); 674 data->compressed_length = get4LE(ptr + kCDECompLen); 675 data->uncompressed_length = get4LE(ptr + kCDEUncompLen); 676 677 // Figure out the local header offset from the central directory. The 678 // actual file data will begin after the local header and the name / 679 // extra comments. 680 const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset); 681 if (local_header_offset + (off64_t) kLFHLen >= cd_offset) { 682 ALOGW("Zip: bad local hdr offset in zip"); 683 return kInvalidOffset; 684 } 685 686 uint8_t lfh_buf[kLFHLen]; 687 ssize_t actual = ReadAtOffset(archive->fd, lfh_buf, sizeof(lfh_buf), 688 local_header_offset); 689 if (actual != sizeof(lfh_buf)) { 690 ALOGW("Zip: failed reading lfh name from offset %" PRId64, (int64_t)local_header_offset); 691 return kIoError; 692 } 693 694 if (get4LE(lfh_buf) != kLFHSignature) { 695 ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64, 696 (int64_t)local_header_offset); 697 return kInvalidOffset; 698 } 699 700 // Paranoia: Match the values specified in the local file header 701 // to those specified in the central directory. 702 const uint16_t lfhGpbFlags = get2LE(lfh_buf + kLFHGPBFlags); 703 const uint16_t lfhNameLen = get2LE(lfh_buf + kLFHNameLen); 704 const uint16_t lfhExtraLen = get2LE(lfh_buf + kLFHExtraLen); 705 706 if ((lfhGpbFlags & kGPBDDFlagMask) == 0) { 707 const uint32_t lfhCrc = get4LE(lfh_buf + kLFHCRC); 708 const uint32_t lfhCompLen = get4LE(lfh_buf + kLFHCompLen); 709 const uint32_t lfhUncompLen = get4LE(lfh_buf + kLFHUncompLen); 710 711 data->has_data_descriptor = 0; 712 if (data->compressed_length != lfhCompLen || data->uncompressed_length != lfhUncompLen 713 || data->crc32 != lfhCrc) { 714 ALOGW("Zip: size/crc32 mismatch. expected {%" PRIu32 ", %" PRIu32 715 ", %" PRIx32 "}, was {%" PRIu32 ", %" PRIu32 ", %" PRIx32 "}", 716 data->compressed_length, data->uncompressed_length, data->crc32, 717 lfhCompLen, lfhUncompLen, lfhCrc); 718 return kInconsistentInformation; 719 } 720 } else { 721 data->has_data_descriptor = 1; 722 } 723 724 // Check that the local file header name matches the declared 725 // name in the central directory. 726 if (lfhNameLen == nameLen) { 727 const off64_t name_offset = local_header_offset + kLFHLen; 728 if (name_offset + lfhNameLen >= cd_offset) { 729 ALOGW("Zip: Invalid declared length"); 730 return kInvalidOffset; 731 } 732 733 uint8_t* name_buf = (uint8_t*) malloc(nameLen); 734 ssize_t actual = ReadAtOffset(archive->fd, name_buf, nameLen, 735 name_offset); 736 737 if (actual != nameLen) { 738 ALOGW("Zip: failed reading lfh name from offset %" PRId64, (int64_t)name_offset); 739 free(name_buf); 740 return kIoError; 741 } 742 743 if (memcmp(name, name_buf, nameLen)) { 744 free(name_buf); 745 return kInconsistentInformation; 746 } 747 748 free(name_buf); 749 } else { 750 ALOGW("Zip: lfh name did not match central directory."); 751 return kInconsistentInformation; 752 } 753 754 const off64_t data_offset = local_header_offset + kLFHLen + lfhNameLen + lfhExtraLen; 755 if (data_offset > cd_offset) { 756 ALOGW("Zip: bad data offset %" PRId64 " in zip", (int64_t)data_offset); 757 return kInvalidOffset; 758 } 759 760 if ((off64_t)(data_offset + data->compressed_length) > cd_offset) { 761 ALOGW("Zip: bad compressed length in zip (%" PRId64 " + %" PRIu32 " > %" PRId64 ")", 762 (int64_t)data_offset, data->compressed_length, (int64_t)cd_offset); 763 return kInvalidOffset; 764 } 765 766 if (data->method == kCompressStored && 767 (off64_t)(data_offset + data->uncompressed_length) > cd_offset) { 768 ALOGW("Zip: bad uncompressed length in zip (%" PRId64 " + %" PRIu32 " > %" PRId64 ")", 769 data_offset, data->uncompressed_length, cd_offset); 770 return kInvalidOffset; 771 } 772 773 data->offset = data_offset; 774 return 0; 775} 776 777struct IterationHandle { 778 uint32_t position; 779 const char* prefix; 780 uint16_t prefix_len; 781 ZipArchive* archive; 782}; 783 784int32_t StartIteration(ZipArchiveHandle handle, void** cookie_ptr, const char* prefix) { 785 ZipArchive* archive = (ZipArchive *) handle; 786 787 if (archive == NULL || archive->hash_table == NULL) { 788 ALOGW("Zip: Invalid ZipArchiveHandle"); 789 return kInvalidHandle; 790 } 791 792 IterationHandle* cookie = (IterationHandle*) malloc(sizeof(IterationHandle)); 793 cookie->position = 0; 794 cookie->prefix = prefix; 795 cookie->archive = archive; 796 if (prefix != NULL) { 797 cookie->prefix_len = strlen(prefix); 798 } 799 800 *cookie_ptr = cookie ; 801 return 0; 802} 803 804int32_t FindEntry(const ZipArchiveHandle handle, const char* entryName, 805 ZipEntry* data) { 806 const ZipArchive* archive = (ZipArchive*) handle; 807 const int nameLen = strlen(entryName); 808 if (nameLen == 0 || nameLen > 65535) { 809 ALOGW("Zip: Invalid filename %s", entryName); 810 return kInvalidEntryName; 811 } 812 813 const int64_t ent = EntryToIndex(archive->hash_table, 814 archive->hash_table_size, entryName, nameLen); 815 816 if (ent < 0) { 817 ALOGV("Zip: Could not find entry %.*s", nameLen, entryName); 818 return ent; 819 } 820 821 return FindEntry(archive, ent, data); 822} 823 824int32_t Next(void* cookie, ZipEntry* data, ZipEntryName* name) { 825 IterationHandle* handle = (IterationHandle *) cookie; 826 if (handle == NULL) { 827 return kInvalidHandle; 828 } 829 830 ZipArchive* archive = handle->archive; 831 if (archive == NULL || archive->hash_table == NULL) { 832 ALOGW("Zip: Invalid ZipArchiveHandle"); 833 return kInvalidHandle; 834 } 835 836 const uint32_t currentOffset = handle->position; 837 const uint32_t hash_table_length = archive->hash_table_size; 838 const ZipEntryName *hash_table = archive->hash_table; 839 840 for (uint32_t i = currentOffset; i < hash_table_length; ++i) { 841 if (hash_table[i].name != NULL && 842 (handle->prefix == NULL || 843 (memcmp(handle->prefix, hash_table[i].name, handle->prefix_len) == 0))) { 844 handle->position = (i + 1); 845 const int error = FindEntry(archive, i, data); 846 if (!error) { 847 name->name = hash_table[i].name; 848 name->name_length = hash_table[i].name_length; 849 } 850 851 return error; 852 } 853 } 854 855 handle->position = 0; 856 return kIterationEnd; 857} 858 859static int32_t InflateToFile(int fd, const ZipEntry* entry, 860 uint8_t* begin, uint32_t length, 861 uint64_t* crc_out) { 862 int32_t result = -1; 863 const uint32_t kBufSize = 32768; 864 uint8_t read_buf[kBufSize]; 865 uint8_t write_buf[kBufSize]; 866 z_stream zstream; 867 int zerr; 868 869 /* 870 * Initialize the zlib stream struct. 871 */ 872 memset(&zstream, 0, sizeof(zstream)); 873 zstream.zalloc = Z_NULL; 874 zstream.zfree = Z_NULL; 875 zstream.opaque = Z_NULL; 876 zstream.next_in = NULL; 877 zstream.avail_in = 0; 878 zstream.next_out = (Bytef*) write_buf; 879 zstream.avail_out = kBufSize; 880 zstream.data_type = Z_UNKNOWN; 881 882 /* 883 * Use the undocumented "negative window bits" feature to tell zlib 884 * that there's no zlib header waiting for it. 885 */ 886 zerr = inflateInit2(&zstream, -MAX_WBITS); 887 if (zerr != Z_OK) { 888 if (zerr == Z_VERSION_ERROR) { 889 ALOGE("Installed zlib is not compatible with linked version (%s)", 890 ZLIB_VERSION); 891 } else { 892 ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr); 893 } 894 895 return kZlibError; 896 } 897 898 const uint32_t uncompressed_length = entry->uncompressed_length; 899 900 uint32_t compressed_length = entry->compressed_length; 901 uint32_t write_count = 0; 902 do { 903 /* read as much as we can */ 904 if (zstream.avail_in == 0) { 905 const ZD_TYPE getSize = (compressed_length > kBufSize) ? kBufSize : compressed_length; 906 const ZD_TYPE actual = TEMP_FAILURE_RETRY(read(fd, read_buf, getSize)); 907 if (actual != getSize) { 908 ALOGW("Zip: inflate read failed (" ZD " vs " ZD ")", actual, getSize); 909 result = kIoError; 910 goto z_bail; 911 } 912 913 compressed_length -= getSize; 914 915 zstream.next_in = read_buf; 916 zstream.avail_in = getSize; 917 } 918 919 /* uncompress the data */ 920 zerr = inflate(&zstream, Z_NO_FLUSH); 921 if (zerr != Z_OK && zerr != Z_STREAM_END) { 922 ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)", 923 zerr, zstream.next_in, zstream.avail_in, 924 zstream.next_out, zstream.avail_out); 925 result = kZlibError; 926 goto z_bail; 927 } 928 929 /* write when we're full or when we're done */ 930 if (zstream.avail_out == 0 || 931 (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) { 932 const size_t write_size = zstream.next_out - write_buf; 933 // The file might have declared a bogus length. 934 if (write_size + write_count > length) { 935 goto z_bail; 936 } 937 memcpy(begin + write_count, write_buf, write_size); 938 write_count += write_size; 939 940 zstream.next_out = write_buf; 941 zstream.avail_out = kBufSize; 942 } 943 } while (zerr == Z_OK); 944 945 assert(zerr == Z_STREAM_END); /* other errors should've been caught */ 946 947 // stream.adler holds the crc32 value for such streams. 948 *crc_out = zstream.adler; 949 950 if (zstream.total_out != uncompressed_length || compressed_length != 0) { 951 ALOGW("Zip: size mismatch on inflated file (%lu vs %" PRIu32 ")", 952 zstream.total_out, uncompressed_length); 953 result = kInconsistentInformation; 954 goto z_bail; 955 } 956 957 result = 0; 958 959z_bail: 960 inflateEnd(&zstream); /* free up any allocated structures */ 961 962 return result; 963} 964 965int32_t ExtractToMemory(ZipArchiveHandle handle, 966 ZipEntry* entry, uint8_t* begin, uint32_t size) { 967 ZipArchive* archive = (ZipArchive*) handle; 968 const uint16_t method = entry->method; 969 off64_t data_offset = entry->offset; 970 971 if (lseek64(archive->fd, data_offset, SEEK_SET) != data_offset) { 972 ALOGW("Zip: lseek to data at %" PRId64 " failed", (int64_t)data_offset); 973 return kIoError; 974 } 975 976 // this should default to kUnknownCompressionMethod. 977 int32_t return_value = -1; 978 uint64_t crc = 0; 979 if (method == kCompressStored) { 980 return_value = CopyFileToFile(archive->fd, begin, size, &crc); 981 } else if (method == kCompressDeflated) { 982 return_value = InflateToFile(archive->fd, entry, begin, size, &crc); 983 } 984 985 if (!return_value && entry->has_data_descriptor) { 986 return_value = UpdateEntryFromDataDescriptor(archive->fd, entry); 987 if (return_value) { 988 return return_value; 989 } 990 } 991 992 // TODO: Fix this check by passing the right flags to inflate2 so that 993 // it calculates the CRC for us. 994 if (entry->crc32 != crc && false) { 995 ALOGW("Zip: crc mismatch: expected %" PRIu32 ", was %" PRIu64, entry->crc32, crc); 996 return kInconsistentInformation; 997 } 998 999 return return_value; 1000} 1001 1002int32_t ExtractEntryToFile(ZipArchiveHandle handle, 1003 ZipEntry* entry, int fd) { 1004 const int32_t declared_length = entry->uncompressed_length; 1005 1006 const off64_t current_offset = lseek64(fd, 0, SEEK_CUR); 1007 if (current_offset == -1) { 1008 ALOGW("Zip: unable to seek to current location on fd %d: %s", fd, 1009 strerror(errno)); 1010 return kIoError; 1011 } 1012 1013 int result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length + current_offset)); 1014 if (result == -1) { 1015 ALOGW("Zip: unable to truncate file to %" PRId64 ": %s", 1016 (int64_t)(declared_length + current_offset), strerror(errno)); 1017 return kIoError; 1018 } 1019 1020 // Don't attempt to map a region of length 0. We still need the 1021 // ftruncate() though, since the API guarantees that we will truncate 1022 // the file to the end of the uncompressed output. 1023 if (declared_length == 0) { 1024 return 0; 1025 } 1026 1027 android::FileMap* map = MapFileSegment(fd, current_offset, declared_length, 1028 false, kTempMappingFileName); 1029 if (map == NULL) { 1030 return kMmapFailed; 1031 } 1032 1033 const int32_t error = ExtractToMemory(handle, entry, 1034 reinterpret_cast<uint8_t*>(map->getDataPtr()), 1035 map->getDataLength()); 1036 map->release(); 1037 return error; 1038} 1039 1040const char* ErrorCodeString(int32_t error_code) { 1041 if (error_code > kErrorMessageLowerBound && error_code < kErrorMessageUpperBound) { 1042 return kErrorMessages[error_code * -1]; 1043 } 1044 1045 return kErrorMessages[0]; 1046} 1047 1048int GetFileDescriptor(const ZipArchiveHandle handle) { 1049 return ((ZipArchive*) handle)->fd; 1050} 1051 1052