zip_archive.cc revision 99ef9914be1e39276e2e077670368927a1221921
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* 18 * Read-only access to Zip archives, with minimal heap allocation. 19 */ 20 21#include <assert.h> 22#include <errno.h> 23#include <fcntl.h> 24#include <inttypes.h> 25#include <limits.h> 26#include <log/log.h> 27#include <stdlib.h> 28#include <string.h> 29#include <unistd.h> 30#include <utils/FileMap.h> 31#include <zlib.h> 32 33#include <JNIHelp.h> // TEMP_FAILURE_RETRY may or may not be in unistd 34 35#include "ziparchive/zip_archive.h" 36 37// This is for windows. If we don't open a file in binary mode, weirds 38// things will happen. 39#ifndef O_BINARY 40#define O_BINARY 0 41#endif 42 43/* 44 * Zip file constants. 45 */ 46static const uint32_t kEOCDSignature = 0x06054b50; 47static const uint32_t kEOCDLen = 2; 48static const uint32_t kEOCDNumEntries = 8; // offset to #of entries in file 49static const uint32_t kEOCDSize = 12; // size of the central directory 50static const uint32_t kEOCDFileOffset = 16; // offset to central directory 51 52static const uint32_t kMaxCommentLen = 65535; // longest possible in ushort 53static const uint32_t kMaxEOCDSearch = (kMaxCommentLen + kEOCDLen); 54 55static const uint32_t kLFHSignature = 0x04034b50; 56static const uint32_t kLFHLen = 30; // excluding variable-len fields 57static const uint32_t kLFHGPBFlags = 6; // general purpose bit flags 58static const uint32_t kLFHCRC = 14; // offset to CRC 59static const uint32_t kLFHCompLen = 18; // offset to compressed length 60static const uint32_t kLFHUncompLen = 22; // offset to uncompressed length 61static const uint32_t kLFHNameLen = 26; // offset to filename length 62static const uint32_t kLFHExtraLen = 28; // offset to extra length 63 64static const uint32_t kCDESignature = 0x02014b50; 65static const uint32_t kCDELen = 46; // excluding variable-len fields 66static const uint32_t kCDEMethod = 10; // offset to compression method 67static const uint32_t kCDEModWhen = 12; // offset to modification timestamp 68static const uint32_t kCDECRC = 16; // offset to entry CRC 69static const uint32_t kCDECompLen = 20; // offset to compressed length 70static const uint32_t kCDEUncompLen = 24; // offset to uncompressed length 71static const uint32_t kCDENameLen = 28; // offset to filename length 72static const uint32_t kCDEExtraLen = 30; // offset to extra length 73static const uint32_t kCDECommentLen = 32; // offset to comment length 74static const uint32_t kCDELocalOffset = 42; // offset to local hdr 75 76static const uint32_t kDDOptSignature = 0x08074b50; // *OPTIONAL* data descriptor signature 77static const uint32_t kDDSignatureLen = 4; 78static const uint32_t kDDLen = 12; 79static const uint32_t kDDMaxLen = 16; // max of 16 bytes with a signature, 12 bytes without 80static const uint32_t kDDCrc32 = 0; // offset to crc32 81static const uint32_t kDDCompLen = 4; // offset to compressed length 82static const uint32_t kDDUncompLen = 8; // offset to uncompressed length 83 84static const uint32_t kGPBDDFlagMask = 0x0008; // mask value that signifies that the entry has a DD 85 86static const uint32_t kMaxErrorLen = 1024; 87 88static const char* kErrorMessages[] = { 89 "Unknown return code.", 90 "Iteration ended", 91 "Zlib error", 92 "Invalid file", 93 "Invalid handle", 94 "Duplicate entries in archive", 95 "Empty archive", 96 "Entry not found", 97 "Invalid offset", 98 "Inconsistent information", 99 "Invalid entry name", 100 "I/O Error", 101 "File mapping failed" 102}; 103 104static const int32_t kErrorMessageUpperBound = 0; 105 106static const int32_t kIterationEnd = -1; 107 108// We encountered a Zlib error when inflating a stream from this file. 109// Usually indicates file corruption. 110static const int32_t kZlibError = -2; 111 112// The input file cannot be processed as a zip archive. Usually because 113// it's too small, too large or does not have a valid signature. 114static const int32_t kInvalidFile = -3; 115 116// An invalid iteration / ziparchive handle was passed in as an input 117// argument. 118static const int32_t kInvalidHandle = -4; 119 120// The zip archive contained two (or possibly more) entries with the same 121// name. 122static const int32_t kDuplicateEntry = -5; 123 124// The zip archive contains no entries. 125static const int32_t kEmptyArchive = -6; 126 127// The specified entry was not found in the archive. 128static const int32_t kEntryNotFound = -7; 129 130// The zip archive contained an invalid local file header pointer. 131static const int32_t kInvalidOffset = -8; 132 133// The zip archive contained inconsistent entry information. This could 134// be because the central directory & local file header did not agree, or 135// if the actual uncompressed length or crc32 do not match their declared 136// values. 137static const int32_t kInconsistentInformation = -9; 138 139// An invalid entry name was encountered. 140static const int32_t kInvalidEntryName = -10; 141 142// An I/O related system call (read, lseek, ftruncate, map) failed. 143static const int32_t kIoError = -11; 144 145// We were not able to mmap the central directory or entry contents. 146static const int32_t kMmapFailed = -12; 147 148static const int32_t kErrorMessageLowerBound = -13; 149 150static const char kTempMappingFileName[] = "zip: ExtractFileToFile"; 151 152/* 153 * A Read-only Zip archive. 154 * 155 * We want "open" and "find entry by name" to be fast operations, and 156 * we want to use as little memory as possible. We memory-map the zip 157 * central directory, and load a hash table with pointers to the filenames 158 * (which aren't null-terminated). The other fields are at a fixed offset 159 * from the filename, so we don't need to extract those (but we do need 160 * to byte-read and endian-swap them every time we want them). 161 * 162 * It's possible that somebody has handed us a massive (~1GB) zip archive, 163 * so we can't expect to mmap the entire file. 164 * 165 * To speed comparisons when doing a lookup by name, we could make the mapping 166 * "private" (copy-on-write) and null-terminate the filenames after verifying 167 * the record structure. However, this requires a private mapping of 168 * every page that the Central Directory touches. Easier to tuck a copy 169 * of the string length into the hash table entry. 170 */ 171struct ZipArchive { 172 /* open Zip archive */ 173 int fd; 174 175 /* mapped central directory area */ 176 off64_t directory_offset; 177 android::FileMap* directory_map; 178 179 /* number of entries in the Zip archive */ 180 uint16_t num_entries; 181 182 /* 183 * We know how many entries are in the Zip archive, so we can have a 184 * fixed-size hash table. We define a load factor of 0.75 and overallocat 185 * so the maximum number entries can never be higher than 186 * ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t. 187 */ 188 uint32_t hash_table_size; 189 ZipEntryName* hash_table; 190}; 191 192// Returns 0 on success and negative values on failure. 193static android::FileMap* MapFileSegment(const int fd, const off64_t start, 194 const size_t length, const bool read_only, 195 const char* debug_file_name) { 196 android::FileMap* file_map = new android::FileMap; 197 const bool success = file_map->create(debug_file_name, fd, start, length, read_only); 198 if (!success) { 199 file_map->release(); 200 return NULL; 201 } 202 203 return file_map; 204} 205 206static int32_t CopyFileToFile(int fd, uint8_t* begin, const uint32_t length, uint64_t *crc_out) { 207 static const uint32_t kBufSize = 32768; 208 uint8_t buf[kBufSize]; 209 210 uint32_t count = 0; 211 uint64_t crc = 0; 212 while (count < length) { 213 uint32_t remaining = length - count; 214 215 // Safe conversion because kBufSize is narrow enough for a 32 bit signed 216 // value. 217 ssize_t get_size = (remaining > kBufSize) ? kBufSize : remaining; 218 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, buf, get_size)); 219 220 if (actual != get_size) { 221 ALOGW("CopyFileToFile: copy read failed (%zd vs %zd)", actual, get_size); 222 return kIoError; 223 } 224 225 memcpy(begin + count, buf, get_size); 226 crc = crc32(crc, buf, get_size); 227 count += get_size; 228 } 229 230 *crc_out = crc; 231 232 return 0; 233} 234 235/* 236 * Round up to the next highest power of 2. 237 * 238 * Found on http://graphics.stanford.edu/~seander/bithacks.html. 239 */ 240static uint32_t RoundUpPower2(uint32_t val) { 241 val--; 242 val |= val >> 1; 243 val |= val >> 2; 244 val |= val >> 4; 245 val |= val >> 8; 246 val |= val >> 16; 247 val++; 248 249 return val; 250} 251 252static uint32_t ComputeHash(const char* str, uint16_t len) { 253 uint32_t hash = 0; 254 255 while (len--) { 256 hash = hash * 31 + *str++; 257 } 258 259 return hash; 260} 261 262/* 263 * Convert a ZipEntry to a hash table index, verifying that it's in a 264 * valid range. 265 */ 266static int64_t EntryToIndex(const ZipEntryName* hash_table, 267 const uint32_t hash_table_size, 268 const char* name, uint16_t length) { 269 const uint32_t hash = ComputeHash(name, length); 270 271 // NOTE: (hash_table_size - 1) is guaranteed to be non-negative. 272 uint32_t ent = hash & (hash_table_size - 1); 273 while (hash_table[ent].name != NULL) { 274 if (hash_table[ent].name_length == length && 275 memcmp(hash_table[ent].name, name, length) == 0) { 276 return ent; 277 } 278 279 ent = (ent + 1) & (hash_table_size - 1); 280 } 281 282 ALOGV("Zip: Unable to find entry %.*s", length, name); 283 return kEntryNotFound; 284} 285 286/* 287 * Add a new entry to the hash table. 288 */ 289static int32_t AddToHash(ZipEntryName *hash_table, const uint64_t hash_table_size, 290 const char* name, uint16_t length) { 291 const uint64_t hash = ComputeHash(name, length); 292 uint32_t ent = hash & (hash_table_size - 1); 293 294 /* 295 * We over-allocated the table, so we're guaranteed to find an empty slot. 296 * Further, we guarantee that the hashtable size is not 0. 297 */ 298 while (hash_table[ent].name != NULL) { 299 if (hash_table[ent].name_length == length && 300 memcmp(hash_table[ent].name, name, length) == 0) { 301 // We've found a duplicate entry. We don't accept it 302 ALOGW("Zip: Found duplicate entry %.*s", length, name); 303 return kDuplicateEntry; 304 } 305 ent = (ent + 1) & (hash_table_size - 1); 306 } 307 308 hash_table[ent].name = name; 309 hash_table[ent].name_length = length; 310 return 0; 311} 312 313/* 314 * Get 2 little-endian bytes. 315 */ 316static uint16_t get2LE(const uint8_t* src) { 317 return src[0] | (src[1] << 8); 318} 319 320/* 321 * Get 4 little-endian bytes. 322 */ 323static uint32_t get4LE(const uint8_t* src) { 324 uint32_t result; 325 326 result = src[0]; 327 result |= src[1] << 8; 328 result |= src[2] << 16; 329 result |= src[3] << 24; 330 331 return result; 332} 333 334static int32_t MapCentralDirectory0(int fd, const char* debug_file_name, 335 ZipArchive* archive, off64_t file_length, 336 uint32_t read_amount, uint8_t* scan_buffer) { 337 const off64_t search_start = file_length - read_amount; 338 339 if (lseek64(fd, search_start, SEEK_SET) != search_start) { 340 ALOGW("Zip: seek %" PRId64 " failed: %s", search_start, strerror(errno)); 341 return kIoError; 342 } 343 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, scan_buffer, read_amount)); 344 if (actual != (ssize_t) read_amount) { 345 ALOGW("Zip: read %u failed: %s", read_amount, strerror(errno)); 346 return kIoError; 347 } 348 349 /* 350 * Scan backward for the EOCD magic. In an archive without a trailing 351 * comment, we'll find it on the first try. (We may want to consider 352 * doing an initial minimal read; if we don't find it, retry with a 353 * second read as above.) 354 */ 355 int i; 356 for (i = read_amount - kEOCDLen; i >= 0; i--) { 357 if (scan_buffer[i] == 0x50 && get4LE(&scan_buffer[i]) == kEOCDSignature) { 358 ALOGV("+++ Found EOCD at buf+%d", i); 359 break; 360 } 361 } 362 if (i < 0) { 363 ALOGD("Zip: EOCD not found, %s is not zip", debug_file_name); 364 return kInvalidFile; 365 } 366 367 const off64_t eocd_offset = search_start + i; 368 const uint8_t* eocd_ptr = scan_buffer + i; 369 370 assert(eocd_offset < file_length); 371 372 /* 373 * Grab the CD offset and size, and the number of entries in the 374 * archive. Verify that they look reasonable. Widen dir_size and 375 * dir_offset to the file offset type. 376 */ 377 const uint16_t num_entries = get2LE(eocd_ptr + kEOCDNumEntries); 378 const off64_t dir_size = get4LE(eocd_ptr + kEOCDSize); 379 const off64_t dir_offset = get4LE(eocd_ptr + kEOCDFileOffset); 380 381 if (dir_offset + dir_size > eocd_offset) { 382 ALOGW("Zip: bad offsets (dir %" PRId64 ", size %" PRId64 ", eocd %" PRId64 ")", 383 dir_offset, dir_size, eocd_offset); 384 return kInvalidOffset; 385 } 386 if (num_entries == 0) { 387 ALOGW("Zip: empty archive?"); 388 return kEmptyArchive; 389 } 390 391 ALOGV("+++ num_entries=%d dir_size=%" PRId64 " dir_offset=%" PRId64, 392 num_entries, dir_size, dir_offset); 393 394 /* 395 * It all looks good. Create a mapping for the CD, and set the fields 396 * in archive. 397 */ 398 android::FileMap* map = MapFileSegment(fd, dir_offset, dir_size, 399 true /* read only */, debug_file_name); 400 if (map == NULL) { 401 archive->directory_map = NULL; 402 return kMmapFailed; 403 } 404 405 archive->directory_map = map; 406 archive->num_entries = num_entries; 407 archive->directory_offset = dir_offset; 408 409 return 0; 410} 411 412/* 413 * Find the zip Central Directory and memory-map it. 414 * 415 * On success, returns 0 after populating fields from the EOCD area: 416 * directory_offset 417 * directory_map 418 * num_entries 419 */ 420static int32_t MapCentralDirectory(int fd, const char* debug_file_name, 421 ZipArchive* archive) { 422 423 // Test file length. We use lseek64 to make sure the file 424 // is small enough to be a zip file (Its size must be less than 425 // 0xffffffff bytes). 426 off64_t file_length = lseek64(fd, 0, SEEK_END); 427 if (file_length == -1) { 428 ALOGV("Zip: lseek on fd %d failed", fd); 429 return kInvalidFile; 430 } 431 432 if (file_length > (off64_t) 0xffffffff) { 433 ALOGV("Zip: zip file too long %" PRId64, file_length); 434 return kInvalidFile; 435 } 436 437 if (file_length < (int64_t) kEOCDLen) { 438 ALOGV("Zip: length %" PRId64 " is too small to be zip", file_length); 439 return kInvalidFile; 440 } 441 442 /* 443 * Perform the traditional EOCD snipe hunt. 444 * 445 * We're searching for the End of Central Directory magic number, 446 * which appears at the start of the EOCD block. It's followed by 447 * 18 bytes of EOCD stuff and up to 64KB of archive comment. We 448 * need to read the last part of the file into a buffer, dig through 449 * it to find the magic number, parse some values out, and use those 450 * to determine the extent of the CD. 451 * 452 * We start by pulling in the last part of the file. 453 */ 454 uint32_t read_amount = kMaxEOCDSearch; 455 if (file_length < (off64_t) read_amount) { 456 read_amount = file_length; 457 } 458 459 uint8_t* scan_buffer = (uint8_t*) malloc(read_amount); 460 int32_t result = MapCentralDirectory0(fd, debug_file_name, archive, 461 file_length, read_amount, scan_buffer); 462 463 free(scan_buffer); 464 return result; 465} 466 467/* 468 * Parses the Zip archive's Central Directory. Allocates and populates the 469 * hash table. 470 * 471 * Returns 0 on success. 472 */ 473static int32_t ParseZipArchive(ZipArchive* archive) { 474 int32_t result = -1; 475 const uint8_t* cd_ptr = (const uint8_t*) archive->directory_map->getDataPtr(); 476 size_t cd_length = archive->directory_map->getDataLength(); 477 uint16_t num_entries = archive->num_entries; 478 479 /* 480 * Create hash table. We have a minimum 75% load factor, possibly as 481 * low as 50% after we round off to a power of 2. There must be at 482 * least one unused entry to avoid an infinite loop during creation. 483 */ 484 archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3); 485 archive->hash_table = (ZipEntryName*) calloc(archive->hash_table_size, 486 sizeof(ZipEntryName)); 487 488 /* 489 * Walk through the central directory, adding entries to the hash 490 * table and verifying values. 491 */ 492 const uint8_t* ptr = cd_ptr; 493 for (uint16_t i = 0; i < num_entries; i++) { 494 if (get4LE(ptr) != kCDESignature) { 495 ALOGW("Zip: missed a central dir sig (at %d)", i); 496 goto bail; 497 } 498 499 if (ptr + kCDELen > cd_ptr + cd_length) { 500 ALOGW("Zip: ran off the end (at %d)", i); 501 goto bail; 502 } 503 504 const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset); 505 if (local_header_offset >= archive->directory_offset) { 506 ALOGW("Zip: bad LFH offset %" PRId64 " at entry %d", local_header_offset, i); 507 goto bail; 508 } 509 510 const uint16_t file_name_length = get2LE(ptr + kCDENameLen); 511 const uint16_t extra_length = get2LE(ptr + kCDEExtraLen); 512 const uint16_t comment_length = get2LE(ptr + kCDECommentLen); 513 514 /* add the CDE filename to the hash table */ 515 const int add_result = AddToHash(archive->hash_table, 516 archive->hash_table_size, (const char*) ptr + kCDELen, file_name_length); 517 if (add_result) { 518 ALOGW("Zip: Error adding entry to hash table %d", add_result); 519 result = add_result; 520 goto bail; 521 } 522 523 ptr += kCDELen + file_name_length + extra_length + comment_length; 524 if ((size_t)(ptr - cd_ptr) > cd_length) { 525 ALOGW("Zip: bad CD advance (%zu vs %zu) at entry %d", 526 (size_t) (ptr - cd_ptr), cd_length, i); 527 goto bail; 528 } 529 } 530 ALOGV("+++ zip good scan %d entries", num_entries); 531 532 result = 0; 533 534bail: 535 return result; 536} 537 538static int32_t OpenArchiveInternal(ZipArchive* archive, 539 const char* debug_file_name) { 540 int32_t result = -1; 541 if ((result = MapCentralDirectory(archive->fd, debug_file_name, archive))) { 542 return result; 543 } 544 545 if ((result = ParseZipArchive(archive))) { 546 return result; 547 } 548 549 return 0; 550} 551 552int32_t OpenArchiveFd(int fd, const char* debug_file_name, 553 ZipArchiveHandle* handle) { 554 ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive)); 555 memset(archive, 0, sizeof(*archive)); 556 *handle = archive; 557 558 archive->fd = fd; 559 560 return OpenArchiveInternal(archive, debug_file_name); 561} 562 563int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle) { 564 ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive)); 565 memset(archive, 0, sizeof(*archive)); 566 *handle = archive; 567 568 const int fd = open(fileName, O_RDONLY | O_BINARY, 0); 569 if (fd < 0) { 570 ALOGW("Unable to open '%s': %s", fileName, strerror(errno)); 571 return kIoError; 572 } else { 573 archive->fd = fd; 574 } 575 576 return OpenArchiveInternal(archive, fileName); 577} 578 579/* 580 * Close a ZipArchive, closing the file and freeing the contents. 581 */ 582void CloseArchive(ZipArchiveHandle handle) { 583 ZipArchive* archive = (ZipArchive*) handle; 584 ALOGV("Closing archive %p", archive); 585 586 if (archive->fd >= 0) { 587 close(archive->fd); 588 } 589 590 if (archive->directory_map != NULL) { 591 archive->directory_map->release(); 592 } 593 free(archive->hash_table); 594 free(archive); 595} 596 597static int32_t UpdateEntryFromDataDescriptor(int fd, 598 ZipEntry *entry) { 599 uint8_t ddBuf[kDDMaxLen]; 600 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, ddBuf, sizeof(ddBuf))); 601 if (actual != sizeof(ddBuf)) { 602 return kIoError; 603 } 604 605 const uint32_t ddSignature = get4LE(ddBuf); 606 uint16_t ddOffset = 0; 607 if (ddSignature == kDDOptSignature) { 608 ddOffset = 4; 609 } 610 611 entry->crc32 = get4LE(ddBuf + ddOffset + kDDCrc32); 612 entry->compressed_length = get4LE(ddBuf + ddOffset + kDDCompLen); 613 entry->uncompressed_length = get4LE(ddBuf + ddOffset + kDDUncompLen); 614 615 return 0; 616} 617 618// Attempts to read |len| bytes into |buf| at offset |off|. 619// 620// This method uses pread64 on platforms that support it and 621// lseek64 + read on platforms that don't. This implies that 622// callers should not rely on the |fd| offset being incremented 623// as a side effect of this call. 624static inline ssize_t ReadAtOffset(int fd, uint8_t* buf, size_t len, 625 off64_t off) { 626#ifdef HAVE_PREAD 627 return TEMP_FAILURE_RETRY(pread64(fd, buf, len, off)); 628#else 629 // The only supported platform that doesn't support pread at the moment 630 // is Windows. Only recent versions of windows support unix like forks, 631 // and even there the semantics are quite different. 632 if (lseek64(fd, off, SEEK_SET) != off) { 633 ALOGW("Zip: failed seek to offset %" PRId64, off); 634 return kIoError; 635 } 636 637 return TEMP_FAILURE_RETRY(read(fd, buf, len)); 638#endif // HAVE_PREAD 639} 640 641static int32_t FindEntry(const ZipArchive* archive, const int ent, 642 ZipEntry* data) { 643 const uint16_t nameLen = archive->hash_table[ent].name_length; 644 const char* name = archive->hash_table[ent].name; 645 646 // Recover the start of the central directory entry from the filename 647 // pointer. The filename is the first entry past the fixed-size data, 648 // so we can just subtract back from that. 649 const unsigned char* ptr = (const unsigned char*) name; 650 ptr -= kCDELen; 651 652 // This is the base of our mmapped region, we have to sanity check that 653 // the name that's in the hash table is a pointer to a location within 654 // this mapped region. 655 const unsigned char* base_ptr = (const unsigned char*) 656 archive->directory_map->getDataPtr(); 657 if (ptr < base_ptr || ptr > base_ptr + archive->directory_map->getDataLength()) { 658 ALOGW("Zip: Invalid entry pointer"); 659 return kInvalidOffset; 660 } 661 662 // The offset of the start of the central directory in the zipfile. 663 // We keep this lying around so that we can sanity check all our lengths 664 // and our per-file structures. 665 const off64_t cd_offset = archive->directory_offset; 666 667 // Fill out the compression method, modification time, crc32 668 // and other interesting attributes from the central directory. These 669 // will later be compared against values from the local file header. 670 data->method = get2LE(ptr + kCDEMethod); 671 data->mod_time = get4LE(ptr + kCDEModWhen); 672 data->crc32 = get4LE(ptr + kCDECRC); 673 data->compressed_length = get4LE(ptr + kCDECompLen); 674 data->uncompressed_length = get4LE(ptr + kCDEUncompLen); 675 676 // Figure out the local header offset from the central directory. The 677 // actual file data will begin after the local header and the name / 678 // extra comments. 679 const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset); 680 if (local_header_offset + (off64_t) kLFHLen >= cd_offset) { 681 ALOGW("Zip: bad local hdr offset in zip"); 682 return kInvalidOffset; 683 } 684 685 uint8_t lfh_buf[kLFHLen]; 686 ssize_t actual = ReadAtOffset(archive->fd, lfh_buf, sizeof(lfh_buf), 687 local_header_offset); 688 if (actual != sizeof(lfh_buf)) { 689 ALOGW("Zip: failed reading lfh name from offset %" PRId64, local_header_offset); 690 return kIoError; 691 } 692 693 if (get4LE(lfh_buf) != kLFHSignature) { 694 ALOGW("Zip: didn't find signature at start of lfh, offset=%" PRId64, 695 local_header_offset); 696 return kInvalidOffset; 697 } 698 699 // Paranoia: Match the values specified in the local file header 700 // to those specified in the central directory. 701 const uint16_t lfhGpbFlags = get2LE(lfh_buf + kLFHGPBFlags); 702 const uint16_t lfhNameLen = get2LE(lfh_buf + kLFHNameLen); 703 const uint16_t lfhExtraLen = get2LE(lfh_buf + kLFHExtraLen); 704 705 if ((lfhGpbFlags & kGPBDDFlagMask) == 0) { 706 const uint32_t lfhCrc = get4LE(lfh_buf + kLFHCRC); 707 const uint32_t lfhCompLen = get4LE(lfh_buf + kLFHCompLen); 708 const uint32_t lfhUncompLen = get4LE(lfh_buf + kLFHUncompLen); 709 710 data->has_data_descriptor = 0; 711 if (data->compressed_length != lfhCompLen || data->uncompressed_length != lfhUncompLen 712 || data->crc32 != lfhCrc) { 713 ALOGW("Zip: size/crc32 mismatch. expected {%d, %d, %x}, was {%d, %d, %x}", 714 data->compressed_length, data->uncompressed_length, data->crc32, 715 lfhCompLen, lfhUncompLen, lfhCrc); 716 return kInconsistentInformation; 717 } 718 } else { 719 data->has_data_descriptor = 1; 720 } 721 722 // Check that the local file header name matches the declared 723 // name in the central directory. 724 if (lfhNameLen == nameLen) { 725 const off64_t name_offset = local_header_offset + kLFHLen; 726 if (name_offset + lfhNameLen >= cd_offset) { 727 ALOGW("Zip: Invalid declared length"); 728 return kInvalidOffset; 729 } 730 731 uint8_t* name_buf = (uint8_t*) malloc(nameLen); 732 ssize_t actual = ReadAtOffset(archive->fd, name_buf, nameLen, 733 name_offset); 734 735 if (actual != nameLen) { 736 ALOGW("Zip: failed reading lfh name from offset %" PRId64, name_offset); 737 free(name_buf); 738 return kIoError; 739 } 740 741 if (memcmp(name, name_buf, nameLen)) { 742 free(name_buf); 743 return kInconsistentInformation; 744 } 745 746 free(name_buf); 747 } else { 748 ALOGW("Zip: lfh name did not match central directory."); 749 return kInconsistentInformation; 750 } 751 752 const off64_t data_offset = local_header_offset + kLFHLen + lfhNameLen + lfhExtraLen; 753 if (data_offset > cd_offset) { 754 ALOGW("Zip: bad data offset %" PRId64 " in zip", data_offset); 755 return kInvalidOffset; 756 } 757 758 if ((off64_t)(data_offset + data->compressed_length) > cd_offset) { 759 ALOGW("Zip: bad compressed length in zip (%" PRId64 " + %zd > %" PRId64 ")", 760 data_offset, data->compressed_length, cd_offset); 761 return kInvalidOffset; 762 } 763 764 if (data->method == kCompressStored && 765 (off64_t)(data_offset + data->uncompressed_length) > cd_offset) { 766 ALOGW("Zip: bad uncompressed length in zip (%" PRId64 " + %d > %" PRId64 ")", 767 data_offset, data->uncompressed_length, cd_offset); 768 return kInvalidOffset; 769 } 770 771 data->offset = data_offset; 772 return 0; 773} 774 775struct IterationHandle { 776 uint32_t position; 777 const char* prefix; 778 uint16_t prefix_len; 779 ZipArchive* archive; 780}; 781 782int32_t StartIteration(ZipArchiveHandle handle, void** cookie_ptr, const char* prefix) { 783 ZipArchive* archive = (ZipArchive *) handle; 784 785 if (archive == NULL || archive->hash_table == NULL) { 786 ALOGW("Zip: Invalid ZipArchiveHandle"); 787 return kInvalidHandle; 788 } 789 790 IterationHandle* cookie = (IterationHandle*) malloc(sizeof(IterationHandle)); 791 cookie->position = 0; 792 cookie->prefix = prefix; 793 cookie->archive = archive; 794 if (prefix != NULL) { 795 cookie->prefix_len = strlen(prefix); 796 } 797 798 *cookie_ptr = cookie ; 799 return 0; 800} 801 802int32_t FindEntry(const ZipArchiveHandle handle, const char* entryName, 803 ZipEntry* data) { 804 const ZipArchive* archive = (ZipArchive*) handle; 805 const int nameLen = strlen(entryName); 806 if (nameLen == 0 || nameLen > 65535) { 807 ALOGW("Zip: Invalid filename %s", entryName); 808 return kInvalidEntryName; 809 } 810 811 const int64_t ent = EntryToIndex(archive->hash_table, 812 archive->hash_table_size, entryName, nameLen); 813 814 if (ent < 0) { 815 ALOGV("Zip: Could not find entry %.*s", nameLen, entryName); 816 return ent; 817 } 818 819 return FindEntry(archive, ent, data); 820} 821 822int32_t Next(void* cookie, ZipEntry* data, ZipEntryName* name) { 823 IterationHandle* handle = (IterationHandle *) cookie; 824 if (handle == NULL) { 825 return kInvalidHandle; 826 } 827 828 ZipArchive* archive = handle->archive; 829 if (archive == NULL || archive->hash_table == NULL) { 830 ALOGW("Zip: Invalid ZipArchiveHandle"); 831 return kInvalidHandle; 832 } 833 834 const uint32_t currentOffset = handle->position; 835 const uint32_t hash_table_length = archive->hash_table_size; 836 const ZipEntryName *hash_table = archive->hash_table; 837 838 for (uint32_t i = currentOffset; i < hash_table_length; ++i) { 839 if (hash_table[i].name != NULL && 840 (handle->prefix == NULL || 841 (memcmp(handle->prefix, hash_table[i].name, handle->prefix_len) == 0))) { 842 handle->position = (i + 1); 843 const int error = FindEntry(archive, i, data); 844 if (!error) { 845 name->name = hash_table[i].name; 846 name->name_length = hash_table[i].name_length; 847 } 848 849 return error; 850 } 851 } 852 853 handle->position = 0; 854 return kIterationEnd; 855} 856 857static int32_t InflateToFile(int fd, const ZipEntry* entry, 858 uint8_t* begin, uint32_t length, 859 uint64_t* crc_out) { 860 int32_t result = -1; 861 const uint32_t kBufSize = 32768; 862 uint8_t read_buf[kBufSize]; 863 uint8_t write_buf[kBufSize]; 864 z_stream zstream; 865 int zerr; 866 867 /* 868 * Initialize the zlib stream struct. 869 */ 870 memset(&zstream, 0, sizeof(zstream)); 871 zstream.zalloc = Z_NULL; 872 zstream.zfree = Z_NULL; 873 zstream.opaque = Z_NULL; 874 zstream.next_in = NULL; 875 zstream.avail_in = 0; 876 zstream.next_out = (Bytef*) write_buf; 877 zstream.avail_out = kBufSize; 878 zstream.data_type = Z_UNKNOWN; 879 880 /* 881 * Use the undocumented "negative window bits" feature to tell zlib 882 * that there's no zlib header waiting for it. 883 */ 884 zerr = inflateInit2(&zstream, -MAX_WBITS); 885 if (zerr != Z_OK) { 886 if (zerr == Z_VERSION_ERROR) { 887 ALOGE("Installed zlib is not compatible with linked version (%s)", 888 ZLIB_VERSION); 889 } else { 890 ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr); 891 } 892 893 return kZlibError; 894 } 895 896 const uint32_t uncompressed_length = entry->uncompressed_length; 897 898 uint32_t compressed_length = entry->compressed_length; 899 uint32_t write_count = 0; 900 do { 901 /* read as much as we can */ 902 if (zstream.avail_in == 0) { 903 const ssize_t getSize = (compressed_length > kBufSize) ? kBufSize : compressed_length; 904 const ssize_t actual = TEMP_FAILURE_RETRY(read(fd, read_buf, getSize)); 905 if (actual != getSize) { 906 ALOGW("Zip: inflate read failed (%zd vs %zd)", actual, getSize); 907 result = kIoError; 908 goto z_bail; 909 } 910 911 compressed_length -= getSize; 912 913 zstream.next_in = read_buf; 914 zstream.avail_in = getSize; 915 } 916 917 /* uncompress the data */ 918 zerr = inflate(&zstream, Z_NO_FLUSH); 919 if (zerr != Z_OK && zerr != Z_STREAM_END) { 920 ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)", 921 zerr, zstream.next_in, zstream.avail_in, 922 zstream.next_out, zstream.avail_out); 923 result = kZlibError; 924 goto z_bail; 925 } 926 927 /* write when we're full or when we're done */ 928 if (zstream.avail_out == 0 || 929 (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) { 930 const size_t write_size = zstream.next_out - write_buf; 931 // The file might have declared a bogus length. 932 if (write_size + write_count > length) { 933 goto z_bail; 934 } 935 memcpy(begin + write_count, write_buf, write_size); 936 write_count += write_size; 937 938 zstream.next_out = write_buf; 939 zstream.avail_out = kBufSize; 940 } 941 } while (zerr == Z_OK); 942 943 assert(zerr == Z_STREAM_END); /* other errors should've been caught */ 944 945 // stream.adler holds the crc32 value for such streams. 946 *crc_out = zstream.adler; 947 948 if (zstream.total_out != uncompressed_length || compressed_length != 0) { 949 ALOGW("Zip: size mismatch on inflated file (%ld vs %u)", 950 zstream.total_out, uncompressed_length); 951 result = kInconsistentInformation; 952 goto z_bail; 953 } 954 955 result = 0; 956 957z_bail: 958 inflateEnd(&zstream); /* free up any allocated structures */ 959 960 return result; 961} 962 963int32_t ExtractToMemory(ZipArchiveHandle handle, 964 ZipEntry* entry, uint8_t* begin, uint32_t size) { 965 ZipArchive* archive = (ZipArchive*) handle; 966 const uint16_t method = entry->method; 967 off64_t data_offset = entry->offset; 968 969 if (lseek64(archive->fd, data_offset, SEEK_SET) != data_offset) { 970 ALOGW("Zip: lseek to data at %" PRId64 " failed", data_offset); 971 return kIoError; 972 } 973 974 // this should default to kUnknownCompressionMethod. 975 int32_t return_value = -1; 976 uint64_t crc = 0; 977 if (method == kCompressStored) { 978 return_value = CopyFileToFile(archive->fd, begin, size, &crc); 979 } else if (method == kCompressDeflated) { 980 return_value = InflateToFile(archive->fd, entry, begin, size, &crc); 981 } 982 983 if (!return_value && entry->has_data_descriptor) { 984 return_value = UpdateEntryFromDataDescriptor(archive->fd, entry); 985 if (return_value) { 986 return return_value; 987 } 988 } 989 990 // TODO: Fix this check by passing the right flags to inflate2 so that 991 // it calculates the CRC for us. 992 if (entry->crc32 != crc && false) { 993 ALOGW("Zip: crc mismatch: expected %u, was %" PRIu64, entry->crc32, crc); 994 return kInconsistentInformation; 995 } 996 997 return return_value; 998} 999 1000int32_t ExtractEntryToFile(ZipArchiveHandle handle, 1001 ZipEntry* entry, int fd) { 1002 const int32_t declared_length = entry->uncompressed_length; 1003 1004 const off64_t current_offset = lseek64(fd, 0, SEEK_CUR); 1005 if (current_offset == -1) { 1006 ALOGW("Zip: unable to seek to current location on fd %d: %s", fd, 1007 strerror(errno)); 1008 return kIoError; 1009 } 1010 1011 int result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length + current_offset)); 1012 if (result == -1) { 1013 ALOGW("Zip: unable to truncate file to %" PRId64 ": %s", 1014 declared_length + current_offset, strerror(errno)); 1015 return kIoError; 1016 } 1017 1018 // Don't attempt to map a region of length 0. We still need the 1019 // ftruncate() though, since the API guarantees that we will truncate 1020 // the file to the end of the uncompressed output. 1021 if (declared_length == 0) { 1022 return 0; 1023 } 1024 1025 android::FileMap* map = MapFileSegment(fd, current_offset, declared_length, 1026 false, kTempMappingFileName); 1027 if (map == NULL) { 1028 return kMmapFailed; 1029 } 1030 1031 const int32_t error = ExtractToMemory(handle, entry, 1032 reinterpret_cast<uint8_t*>(map->getDataPtr()), 1033 map->getDataLength()); 1034 map->release(); 1035 return error; 1036} 1037 1038const char* ErrorCodeString(int32_t error_code) { 1039 if (error_code > kErrorMessageLowerBound && error_code < kErrorMessageUpperBound) { 1040 return kErrorMessages[error_code * -1]; 1041 } 1042 1043 return kErrorMessages[0]; 1044} 1045 1046int GetFileDescriptor(const ZipArchiveHandle handle) { 1047 return ((ZipArchive*) handle)->fd; 1048} 1049 1050