zip_archive.cc revision eaf988532b9e603b1599b7750bfa923fbb39d297
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* 18 * Read-only access to Zip archives, with minimal heap allocation. 19 */ 20#include "ziparchive/zip_archive.h" 21 22#include <zlib.h> 23 24#include <assert.h> 25#include <errno.h> 26#include <limits.h> 27#include <log/log.h> 28#include <fcntl.h> 29#include <stdlib.h> 30#include <string.h> 31#include <unistd.h> 32#include <utils/FileMap.h> 33 34#include <JNIHelp.h> // TEMP_FAILURE_RETRY may or may not be in unistd 35 36// This is for windows. If we don't open a file in binary mode, weirds 37// things will happen. 38#ifndef O_BINARY 39#define O_BINARY 0 40#endif 41 42/* 43 * Zip file constants. 44 */ 45static const uint32_t kEOCDSignature = 0x06054b50; 46static const uint32_t kEOCDLen = 2; 47static const uint32_t kEOCDNumEntries = 8; // offset to #of entries in file 48static const uint32_t kEOCDSize = 12; // size of the central directory 49static const uint32_t kEOCDFileOffset = 16; // offset to central directory 50 51static const uint32_t kMaxCommentLen = 65535; // longest possible in ushort 52static const uint32_t kMaxEOCDSearch = (kMaxCommentLen + kEOCDLen); 53 54static const uint32_t kLFHSignature = 0x04034b50; 55static const uint32_t kLFHLen = 30; // excluding variable-len fields 56static const uint32_t kLFHGPBFlags = 6; // general purpose bit flags 57static const uint32_t kLFHCRC = 14; // offset to CRC 58static const uint32_t kLFHCompLen = 18; // offset to compressed length 59static const uint32_t kLFHUncompLen = 22; // offset to uncompressed length 60static const uint32_t kLFHNameLen = 26; // offset to filename length 61static const uint32_t kLFHExtraLen = 28; // offset to extra length 62 63static const uint32_t kCDESignature = 0x02014b50; 64static const uint32_t kCDELen = 46; // excluding variable-len fields 65static const uint32_t kCDEMethod = 10; // offset to compression method 66static const uint32_t kCDEModWhen = 12; // offset to modification timestamp 67static const uint32_t kCDECRC = 16; // offset to entry CRC 68static const uint32_t kCDECompLen = 20; // offset to compressed length 69static const uint32_t kCDEUncompLen = 24; // offset to uncompressed length 70static const uint32_t kCDENameLen = 28; // offset to filename length 71static const uint32_t kCDEExtraLen = 30; // offset to extra length 72static const uint32_t kCDECommentLen = 32; // offset to comment length 73static const uint32_t kCDELocalOffset = 42; // offset to local hdr 74 75static const uint32_t kDDOptSignature = 0x08074b50; // *OPTIONAL* data descriptor signature 76static const uint32_t kDDSignatureLen = 4; 77static const uint32_t kDDLen = 12; 78static const uint32_t kDDMaxLen = 16; // max of 16 bytes with a signature, 12 bytes without 79static const uint32_t kDDCrc32 = 0; // offset to crc32 80static const uint32_t kDDCompLen = 4; // offset to compressed length 81static const uint32_t kDDUncompLen = 8; // offset to uncompressed length 82 83static const uint32_t kGPBDDFlagMask = 0x0008; // mask value that signifies that the entry has a DD 84 85static const uint32_t kMaxErrorLen = 1024; 86 87static const char* kErrorMessages[] = { 88 "Unknown return code.", 89 "Iteration ended", 90 "Zlib error", 91 "Invalid file", 92 "Invalid handle", 93 "Duplicate entries in archive", 94 "Empty archive", 95 "Entry not found", 96 "Invalid offset", 97 "Inconsistent information", 98 "Invalid entry name", 99 "I/O Error", 100 "File mapping failed" 101}; 102 103static const int32_t kErrorMessageUpperBound = 0; 104 105static const int32_t kIterationEnd = -1; 106 107// We encountered a Zlib error when inflating a stream from this file. 108// Usually indicates file corruption. 109static const int32_t kZlibError = -2; 110 111// The input file cannot be processed as a zip archive. Usually because 112// it's too small, too large or does not have a valid signature. 113static const int32_t kInvalidFile = -3; 114 115// An invalid iteration / ziparchive handle was passed in as an input 116// argument. 117static const int32_t kInvalidHandle = -4; 118 119// The zip archive contained two (or possibly more) entries with the same 120// name. 121static const int32_t kDuplicateEntry = -5; 122 123// The zip archive contains no entries. 124static const int32_t kEmptyArchive = -6; 125 126// The specified entry was not found in the archive. 127static const int32_t kEntryNotFound = -7; 128 129// The zip archive contained an invalid local file header pointer. 130static const int32_t kInvalidOffset = -8; 131 132// The zip archive contained inconsistent entry information. This could 133// be because the central directory & local file header did not agree, or 134// if the actual uncompressed length or crc32 do not match their declared 135// values. 136static const int32_t kInconsistentInformation = -9; 137 138// An invalid entry name was encountered. 139static const int32_t kInvalidEntryName = -10; 140 141// An I/O related system call (read, lseek, ftruncate, map) failed. 142static const int32_t kIoError = -11; 143 144// We were not able to mmap the central directory or entry contents. 145static const int32_t kMmapFailed = -12; 146 147static const int32_t kErrorMessageLowerBound = -13; 148 149static const char kTempMappingFileName[] = "zip: ExtractFileToFile"; 150 151/* 152 * A Read-only Zip archive. 153 * 154 * We want "open" and "find entry by name" to be fast operations, and 155 * we want to use as little memory as possible. We memory-map the zip 156 * central directory, and load a hash table with pointers to the filenames 157 * (which aren't null-terminated). The other fields are at a fixed offset 158 * from the filename, so we don't need to extract those (but we do need 159 * to byte-read and endian-swap them every time we want them). 160 * 161 * It's possible that somebody has handed us a massive (~1GB) zip archive, 162 * so we can't expect to mmap the entire file. 163 * 164 * To speed comparisons when doing a lookup by name, we could make the mapping 165 * "private" (copy-on-write) and null-terminate the filenames after verifying 166 * the record structure. However, this requires a private mapping of 167 * every page that the Central Directory touches. Easier to tuck a copy 168 * of the string length into the hash table entry. 169 */ 170struct ZipArchive { 171 /* open Zip archive */ 172 int fd; 173 174 /* mapped central directory area */ 175 off64_t directory_offset; 176 android::FileMap* directory_map; 177 178 /* number of entries in the Zip archive */ 179 uint16_t num_entries; 180 181 /* 182 * We know how many entries are in the Zip archive, so we can have a 183 * fixed-size hash table. We define a load factor of 0.75 and overallocat 184 * so the maximum number entries can never be higher than 185 * ((4 * UINT16_MAX) / 3 + 1) which can safely fit into a uint32_t. 186 */ 187 uint32_t hash_table_size; 188 ZipEntryName* hash_table; 189}; 190 191// Returns 0 on success and negative values on failure. 192static android::FileMap* MapFileSegment(const int fd, const off64_t start, 193 const size_t length, const bool read_only, 194 const char* debug_file_name) { 195 android::FileMap* file_map = new android::FileMap; 196 const bool success = file_map->create(debug_file_name, fd, start, length, read_only); 197 if (!success) { 198 file_map->release(); 199 return NULL; 200 } 201 202 return file_map; 203} 204 205static int32_t CopyFileToFile(int fd, uint8_t* begin, const uint32_t length, uint64_t *crc_out) { 206 static const uint32_t kBufSize = 32768; 207 uint8_t buf[kBufSize]; 208 209 uint32_t count = 0; 210 uint64_t crc = 0; 211 while (count < length) { 212 uint32_t remaining = length - count; 213 214 // Safe conversion because kBufSize is narrow enough for a 32 bit signed 215 // value. 216 ssize_t get_size = (remaining > kBufSize) ? kBufSize : remaining; 217 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, buf, get_size)); 218 219 if (actual != get_size) { 220 ALOGW("CopyFileToFile: copy read failed (%d vs %zd)", 221 (int) actual, get_size); 222 return kIoError; 223 } 224 225 memcpy(begin + count, buf, get_size); 226 crc = crc32(crc, buf, get_size); 227 count += get_size; 228 } 229 230 *crc_out = crc; 231 232 return 0; 233} 234 235/* 236 * Round up to the next highest power of 2. 237 * 238 * Found on http://graphics.stanford.edu/~seander/bithacks.html. 239 */ 240static uint32_t RoundUpPower2(uint32_t val) { 241 val--; 242 val |= val >> 1; 243 val |= val >> 2; 244 val |= val >> 4; 245 val |= val >> 8; 246 val |= val >> 16; 247 val++; 248 249 return val; 250} 251 252static uint32_t ComputeHash(const char* str, uint16_t len) { 253 uint32_t hash = 0; 254 255 while (len--) { 256 hash = hash * 31 + *str++; 257 } 258 259 return hash; 260} 261 262/* 263 * Convert a ZipEntry to a hash table index, verifying that it's in a 264 * valid range. 265 */ 266static int64_t EntryToIndex(const ZipEntryName* hash_table, 267 const uint32_t hash_table_size, 268 const char* name, uint16_t length) { 269 const uint32_t hash = ComputeHash(name, length); 270 271 // NOTE: (hash_table_size - 1) is guaranteed to be non-negative. 272 uint32_t ent = hash & (hash_table_size - 1); 273 while (hash_table[ent].name != NULL) { 274 if (hash_table[ent].name_length == length && 275 memcmp(hash_table[ent].name, name, length) == 0) { 276 return ent; 277 } 278 279 ent = (ent + 1) & (hash_table_size - 1); 280 } 281 282 ALOGV("Zip: Unable to find entry %.*s", name_length, name); 283 return kEntryNotFound; 284} 285 286/* 287 * Add a new entry to the hash table. 288 */ 289static int32_t AddToHash(ZipEntryName *hash_table, const uint64_t hash_table_size, 290 const char* name, uint16_t length) { 291 const uint64_t hash = ComputeHash(name, length); 292 uint32_t ent = hash & (hash_table_size - 1); 293 294 /* 295 * We over-allocated the table, so we're guaranteed to find an empty slot. 296 * Further, we guarantee that the hashtable size is not 0. 297 */ 298 while (hash_table[ent].name != NULL) { 299 if (hash_table[ent].name_length == length && 300 memcmp(hash_table[ent].name, name, length) == 0) { 301 // We've found a duplicate entry. We don't accept it 302 ALOGW("Zip: Found duplicate entry %.*s", length, name); 303 return kDuplicateEntry; 304 } 305 ent = (ent + 1) & (hash_table_size - 1); 306 } 307 308 hash_table[ent].name = name; 309 hash_table[ent].name_length = length; 310 return 0; 311} 312 313/* 314 * Get 2 little-endian bytes. 315 */ 316static uint16_t get2LE(const uint8_t* src) { 317 return src[0] | (src[1] << 8); 318} 319 320/* 321 * Get 4 little-endian bytes. 322 */ 323static uint32_t get4LE(const uint8_t* src) { 324 uint32_t result; 325 326 result = src[0]; 327 result |= src[1] << 8; 328 result |= src[2] << 16; 329 result |= src[3] << 24; 330 331 return result; 332} 333 334static int32_t MapCentralDirectory0(int fd, const char* debug_file_name, 335 ZipArchive* archive, off64_t file_length, 336 uint32_t read_amount, uint8_t* scan_buffer) { 337 const off64_t search_start = file_length - read_amount; 338 339 if (lseek64(fd, search_start, SEEK_SET) != search_start) { 340 ALOGW("Zip: seek %lld failed: %s", search_start, strerror(errno)); 341 return kIoError; 342 } 343 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, scan_buffer, read_amount)); 344 if (actual != (ssize_t) read_amount) { 345 ALOGW("Zip: read %zd failed: %s", read_amount, strerror(errno)); 346 return kIoError; 347 } 348 349 /* 350 * Scan backward for the EOCD magic. In an archive without a trailing 351 * comment, we'll find it on the first try. (We may want to consider 352 * doing an initial minimal read; if we don't find it, retry with a 353 * second read as above.) 354 */ 355 int i; 356 for (i = read_amount - kEOCDLen; i >= 0; i--) { 357 if (scan_buffer[i] == 0x50 && get4LE(&scan_buffer[i]) == kEOCDSignature) { 358 ALOGV("+++ Found EOCD at buf+%d", i); 359 break; 360 } 361 } 362 if (i < 0) { 363 ALOGD("Zip: EOCD not found, %s is not zip", debug_file_name); 364 return kInvalidFile; 365 } 366 367 const off64_t eocd_offset = search_start + i; 368 const uint8_t* eocd_ptr = scan_buffer + i; 369 370 assert(eocd_offset < file_length); 371 372 /* 373 * Grab the CD offset and size, and the number of entries in the 374 * archive. Verify that they look reasonable. Widen dir_size and 375 * dir_offset to the file offset type. 376 */ 377 const uint16_t num_entries = get2LE(eocd_ptr + kEOCDNumEntries); 378 const off64_t dir_size = get4LE(eocd_ptr + kEOCDSize); 379 const off64_t dir_offset = get4LE(eocd_ptr + kEOCDFileOffset); 380 381 if (dir_offset + dir_size > eocd_offset) { 382 ALOGW("Zip: bad offsets (dir %lld, size %lld, eocd %lld)", 383 dir_offset, dir_size, eocd_offset); 384 return kInvalidOffset; 385 } 386 if (num_entries == 0) { 387 ALOGW("Zip: empty archive?"); 388 return kEmptyArchive; 389 } 390 391 ALOGV("+++ num_entries=%d dir_size=%d dir_offset=%d", num_entries, dir_size, 392 dir_offset); 393 394 /* 395 * It all looks good. Create a mapping for the CD, and set the fields 396 * in archive. 397 */ 398 android::FileMap* map = MapFileSegment(fd, dir_offset, dir_size, 399 true /* read only */, debug_file_name); 400 if (map == NULL) { 401 archive->directory_map = NULL; 402 return kMmapFailed; 403 } 404 405 archive->directory_map = map; 406 archive->num_entries = num_entries; 407 archive->directory_offset = dir_offset; 408 409 return 0; 410} 411 412/* 413 * Find the zip Central Directory and memory-map it. 414 * 415 * On success, returns 0 after populating fields from the EOCD area: 416 * directory_offset 417 * directory_map 418 * num_entries 419 */ 420static int32_t MapCentralDirectory(int fd, const char* debug_file_name, 421 ZipArchive* archive) { 422 423 // Test file length. We use lseek64 to make sure the file 424 // is small enough to be a zip file (Its size must be less than 425 // 0xffffffff bytes). 426 off64_t file_length = lseek64(fd, 0, SEEK_END); 427 if (file_length == -1) { 428 ALOGV("Zip: lseek on fd %d failed", fd); 429 return kInvalidFile; 430 } 431 432 if (file_length > (off64_t) 0xffffffff) { 433 ALOGV("Zip: zip file too long %d", file_length); 434 return kInvalidFile; 435 } 436 437 if (file_length < (int64_t) kEOCDLen) { 438 ALOGV("Zip: length %ld is too small to be zip", file_length); 439 return kInvalidFile; 440 } 441 442 /* 443 * Perform the traditional EOCD snipe hunt. 444 * 445 * We're searching for the End of Central Directory magic number, 446 * which appears at the start of the EOCD block. It's followed by 447 * 18 bytes of EOCD stuff and up to 64KB of archive comment. We 448 * need to read the last part of the file into a buffer, dig through 449 * it to find the magic number, parse some values out, and use those 450 * to determine the extent of the CD. 451 * 452 * We start by pulling in the last part of the file. 453 */ 454 uint32_t read_amount = kMaxEOCDSearch; 455 if (file_length < (off64_t) read_amount) { 456 read_amount = file_length; 457 } 458 459 uint8_t* scan_buffer = (uint8_t*) malloc(read_amount); 460 int32_t result = MapCentralDirectory0(fd, debug_file_name, archive, 461 file_length, read_amount, scan_buffer); 462 463 free(scan_buffer); 464 return result; 465} 466 467/* 468 * Parses the Zip archive's Central Directory. Allocates and populates the 469 * hash table. 470 * 471 * Returns 0 on success. 472 */ 473static int32_t ParseZipArchive(ZipArchive* archive) { 474 int32_t result = -1; 475 const uint8_t* cd_ptr = (const uint8_t*) archive->directory_map->getDataPtr(); 476 size_t cd_length = archive->directory_map->getDataLength(); 477 uint16_t num_entries = archive->num_entries; 478 479 /* 480 * Create hash table. We have a minimum 75% load factor, possibly as 481 * low as 50% after we round off to a power of 2. There must be at 482 * least one unused entry to avoid an infinite loop during creation. 483 */ 484 archive->hash_table_size = RoundUpPower2(1 + (num_entries * 4) / 3); 485 archive->hash_table = (ZipEntryName*) calloc(archive->hash_table_size, 486 sizeof(ZipEntryName)); 487 488 /* 489 * Walk through the central directory, adding entries to the hash 490 * table and verifying values. 491 */ 492 const uint8_t* ptr = cd_ptr; 493 for (uint16_t i = 0; i < num_entries; i++) { 494 if (get4LE(ptr) != kCDESignature) { 495 ALOGW("Zip: missed a central dir sig (at %d)", i); 496 goto bail; 497 } 498 499 if (ptr + kCDELen > cd_ptr + cd_length) { 500 ALOGW("Zip: ran off the end (at %d)", i); 501 goto bail; 502 } 503 504 const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset); 505 if (local_header_offset >= archive->directory_offset) { 506 ALOGW("Zip: bad LFH offset %lld at entry %d", local_header_offset, i); 507 goto bail; 508 } 509 510 const uint16_t file_name_length = get2LE(ptr + kCDENameLen); 511 const uint16_t extra_length = get2LE(ptr + kCDEExtraLen); 512 const uint16_t comment_length = get2LE(ptr + kCDECommentLen); 513 514 /* add the CDE filename to the hash table */ 515 const int add_result = AddToHash(archive->hash_table, 516 archive->hash_table_size, (const char*) ptr + kCDELen, file_name_length); 517 if (add_result) { 518 ALOGW("Zip: Error adding entry to hash table %d", add_result); 519 result = add_result; 520 goto bail; 521 } 522 523 ptr += kCDELen + file_name_length + extra_length + comment_length; 524 if ((size_t)(ptr - cd_ptr) > cd_length) { 525 ALOGW("Zip: bad CD advance (%d vs %zd) at entry %d", 526 (int) (ptr - cd_ptr), cd_length, i); 527 goto bail; 528 } 529 } 530 ALOGV("+++ zip good scan %d entries", num_entries); 531 532 result = 0; 533 534bail: 535 return result; 536} 537 538static int32_t OpenArchiveInternal(ZipArchive* archive, 539 const char* debug_file_name) { 540 int32_t result = -1; 541 if ((result = MapCentralDirectory(archive->fd, debug_file_name, archive))) { 542 return result; 543 } 544 545 if ((result = ParseZipArchive(archive))) { 546 return result; 547 } 548 549 return 0; 550} 551 552int32_t OpenArchiveFd(int fd, const char* debug_file_name, 553 ZipArchiveHandle* handle) { 554 ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive)); 555 memset(archive, 0, sizeof(*archive)); 556 *handle = archive; 557 558 archive->fd = fd; 559 560 return OpenArchiveInternal(archive, debug_file_name); 561} 562 563int32_t OpenArchive(const char* fileName, ZipArchiveHandle* handle) { 564 ZipArchive* archive = (ZipArchive*) malloc(sizeof(ZipArchive)); 565 memset(archive, 0, sizeof(*archive)); 566 *handle = archive; 567 568 const int fd = open(fileName, O_RDONLY | O_BINARY, 0); 569 if (fd < 0) { 570 ALOGW("Unable to open '%s': %s", fileName, strerror(errno)); 571 return kIoError; 572 } else { 573 archive->fd = fd; 574 } 575 576 return OpenArchiveInternal(archive, fileName); 577} 578 579/* 580 * Close a ZipArchive, closing the file and freeing the contents. 581 */ 582void CloseArchive(ZipArchiveHandle handle) { 583 ZipArchive* archive = (ZipArchive*) handle; 584 ALOGV("Closing archive %p", archive); 585 586 if (archive->fd >= 0) { 587 close(archive->fd); 588 } 589 590 if (archive->directory_map != NULL) { 591 archive->directory_map->release(); 592 } 593 free(archive->hash_table); 594 595 /* ensure nobody tries to use the ZipArchive after it's closed */ 596 archive->directory_offset = -1; 597 archive->fd = -1; 598 archive->num_entries = -1; 599 archive->hash_table_size = -1; 600 archive->hash_table = NULL; 601} 602 603static int32_t UpdateEntryFromDataDescriptor(int fd, 604 ZipEntry *entry) { 605 uint8_t ddBuf[kDDMaxLen]; 606 ssize_t actual = TEMP_FAILURE_RETRY(read(fd, ddBuf, sizeof(ddBuf))); 607 if (actual != sizeof(ddBuf)) { 608 return kIoError; 609 } 610 611 const uint32_t ddSignature = get4LE(ddBuf); 612 uint16_t ddOffset = 0; 613 if (ddSignature == kDDOptSignature) { 614 ddOffset = 4; 615 } 616 617 entry->crc32 = get4LE(ddBuf + ddOffset + kDDCrc32); 618 entry->compressed_length = get4LE(ddBuf + ddOffset + kDDCompLen); 619 entry->uncompressed_length = get4LE(ddBuf + ddOffset + kDDUncompLen); 620 621 return 0; 622} 623 624// Attempts to read |len| bytes into |buf| at offset |off|. 625// 626// This method uses pread64 on platforms that support it and 627// lseek64 + read on platforms that don't. This implies that 628// callers should not rely on the |fd| offset being incremented 629// as a side effect of this call. 630static inline ssize_t ReadAtOffset(int fd, uint8_t* buf, size_t len, 631 off64_t off) { 632#ifdef HAVE_PREAD 633 return TEMP_FAILURE_RETRY(pread64(fd, buf, len, off)); 634#else 635 // The only supported platform that doesn't support pread at the moment 636 // is Windows. Only recent versions of windows support unix like forks, 637 // and even there the semantics are quite different. 638 if (lseek64(fd, off, SEEK_SET) != off) { 639 ALOGW("Zip: failed seek to offset %lld", off); 640 return kIoError; 641 } 642 643 return TEMP_FAILURE_RETRY(read(fd, buf, len)); 644#endif // HAVE_PREAD 645} 646 647static int32_t FindEntry(const ZipArchive* archive, const int ent, 648 ZipEntry* data) { 649 const uint16_t nameLen = archive->hash_table[ent].name_length; 650 const char* name = archive->hash_table[ent].name; 651 652 // Recover the start of the central directory entry from the filename 653 // pointer. The filename is the first entry past the fixed-size data, 654 // so we can just subtract back from that. 655 const unsigned char* ptr = (const unsigned char*) name; 656 ptr -= kCDELen; 657 658 // This is the base of our mmapped region, we have to sanity check that 659 // the name that's in the hash table is a pointer to a location within 660 // this mapped region. 661 const unsigned char* base_ptr = (const unsigned char*) 662 archive->directory_map->getDataPtr(); 663 if (ptr < base_ptr || ptr > base_ptr + archive->directory_map->getDataLength()) { 664 ALOGW("Zip: Invalid entry pointer"); 665 return kInvalidOffset; 666 } 667 668 // The offset of the start of the central directory in the zipfile. 669 // We keep this lying around so that we can sanity check all our lengths 670 // and our per-file structures. 671 const off64_t cd_offset = archive->directory_offset; 672 673 // Fill out the compression method, modification time, crc32 674 // and other interesting attributes from the central directory. These 675 // will later be compared against values from the local file header. 676 data->method = get2LE(ptr + kCDEMethod); 677 data->mod_time = get4LE(ptr + kCDEModWhen); 678 data->crc32 = get4LE(ptr + kCDECRC); 679 data->compressed_length = get4LE(ptr + kCDECompLen); 680 data->uncompressed_length = get4LE(ptr + kCDEUncompLen); 681 682 // Figure out the local header offset from the central directory. The 683 // actual file data will begin after the local header and the name / 684 // extra comments. 685 const off64_t local_header_offset = get4LE(ptr + kCDELocalOffset); 686 if (local_header_offset + (off64_t) kLFHLen >= cd_offset) { 687 ALOGW("Zip: bad local hdr offset in zip"); 688 return kInvalidOffset; 689 } 690 691 uint8_t lfh_buf[kLFHLen]; 692 ssize_t actual = ReadAtOffset(archive->fd, lfh_buf, sizeof(lfh_buf), 693 local_header_offset); 694 if (actual != sizeof(lfh_buf)) { 695 ALOGW("Zip: failed reading lfh name from offset %lld", local_header_offset); 696 return kIoError; 697 } 698 699 if (get4LE(lfh_buf) != kLFHSignature) { 700 ALOGW("Zip: didn't find signature at start of lfh, offset=%lld", 701 local_header_offset); 702 return kInvalidOffset; 703 } 704 705 // Paranoia: Match the values specified in the local file header 706 // to those specified in the central directory. 707 const uint16_t lfhGpbFlags = get2LE(lfh_buf + kLFHGPBFlags); 708 const uint16_t lfhNameLen = get2LE(lfh_buf + kLFHNameLen); 709 const uint16_t lfhExtraLen = get2LE(lfh_buf + kLFHExtraLen); 710 711 if ((lfhGpbFlags & kGPBDDFlagMask) == 0) { 712 const uint32_t lfhCrc = get4LE(lfh_buf + kLFHCRC); 713 const uint32_t lfhCompLen = get4LE(lfh_buf + kLFHCompLen); 714 const uint32_t lfhUncompLen = get4LE(lfh_buf + kLFHUncompLen); 715 716 data->has_data_descriptor = 0; 717 if (data->compressed_length != lfhCompLen || data->uncompressed_length != lfhUncompLen 718 || data->crc32 != lfhCrc) { 719 ALOGW("Zip: size/crc32 mismatch. expected {%d, %d, %x}, was {%d, %d, %x}", 720 data->compressed_length, data->uncompressed_length, data->crc32, 721 lfhCompLen, lfhUncompLen, lfhCrc); 722 return kInconsistentInformation; 723 } 724 } else { 725 data->has_data_descriptor = 1; 726 } 727 728 // Check that the local file header name matches the declared 729 // name in the central directory. 730 if (lfhNameLen == nameLen) { 731 const off64_t name_offset = local_header_offset + kLFHLen; 732 if (name_offset + lfhNameLen >= cd_offset) { 733 ALOGW("Zip: Invalid declared length"); 734 return kInvalidOffset; 735 } 736 737 uint8_t* name_buf = (uint8_t*) malloc(nameLen); 738 ssize_t actual = ReadAtOffset(archive->fd, name_buf, nameLen, 739 name_offset); 740 741 if (actual != nameLen) { 742 ALOGW("Zip: failed reading lfh name from offset %lld", name_offset); 743 free(name_buf); 744 return kIoError; 745 } 746 747 if (memcmp(name, name_buf, nameLen)) { 748 free(name_buf); 749 return kInconsistentInformation; 750 } 751 752 free(name_buf); 753 } else { 754 ALOGW("Zip: lfh name did not match central directory."); 755 return kInconsistentInformation; 756 } 757 758 const off64_t data_offset = local_header_offset + kLFHLen + lfhNameLen + lfhExtraLen; 759 if (data_offset >= cd_offset) { 760 ALOGW("Zip: bad data offset %lld in zip", (off64_t) data_offset); 761 return kInvalidOffset; 762 } 763 764 if ((off64_t)(data_offset + data->compressed_length) > cd_offset) { 765 ALOGW("Zip: bad compressed length in zip (%lld + %zd > %lld)", 766 data_offset, data->compressed_length, cd_offset); 767 return kInvalidOffset; 768 } 769 770 if (data->method == kCompressStored && 771 (off64_t)(data_offset + data->uncompressed_length) > cd_offset) { 772 ALOGW("Zip: bad uncompressed length in zip (%lld + %zd > %lld)", 773 data_offset, data->uncompressed_length, cd_offset); 774 return kInvalidOffset; 775 } 776 777 data->offset = data_offset; 778 return 0; 779} 780 781struct IterationHandle { 782 uint32_t position; 783 const char* prefix; 784 uint16_t prefix_len; 785 ZipArchive* archive; 786}; 787 788int32_t StartIteration(ZipArchiveHandle handle, void** cookie_ptr, const char* prefix) { 789 ZipArchive* archive = (ZipArchive *) handle; 790 791 if (archive == NULL || archive->hash_table == NULL) { 792 ALOGW("Zip: Invalid ZipArchiveHandle"); 793 return kInvalidHandle; 794 } 795 796 IterationHandle* cookie = (IterationHandle*) malloc(sizeof(IterationHandle)); 797 cookie->position = 0; 798 cookie->prefix = prefix; 799 cookie->archive = archive; 800 if (prefix != NULL) { 801 cookie->prefix_len = strlen(prefix); 802 } 803 804 *cookie_ptr = cookie ; 805 return 0; 806} 807 808int32_t FindEntry(const ZipArchiveHandle handle, const char* entryName, 809 ZipEntry* data) { 810 const ZipArchive* archive = (ZipArchive*) handle; 811 const int nameLen = strlen(entryName); 812 if (nameLen == 0 || nameLen > 65535) { 813 ALOGW("Zip: Invalid filename %s", entryName); 814 return kInvalidEntryName; 815 } 816 817 const int64_t ent = EntryToIndex(archive->hash_table, 818 archive->hash_table_size, entryName, nameLen); 819 820 if (ent < 0) { 821 ALOGW("Zip: Could not find entry %.*s", nameLen, entryName); 822 return ent; 823 } 824 825 return FindEntry(archive, ent, data); 826} 827 828int32_t Next(void* cookie, ZipEntry* data, ZipEntryName* name) { 829 IterationHandle* handle = (IterationHandle *) cookie; 830 if (handle == NULL) { 831 return kInvalidHandle; 832 } 833 834 ZipArchive* archive = handle->archive; 835 if (archive == NULL || archive->hash_table == NULL) { 836 ALOGW("Zip: Invalid ZipArchiveHandle"); 837 return kInvalidHandle; 838 } 839 840 const uint32_t currentOffset = handle->position; 841 const uint32_t hash_table_length = archive->hash_table_size; 842 const ZipEntryName *hash_table = archive->hash_table; 843 844 for (uint32_t i = currentOffset; i < hash_table_length; ++i) { 845 if (hash_table[i].name != NULL && 846 (handle->prefix == NULL || 847 (memcmp(handle->prefix, hash_table[i].name, handle->prefix_len) == 0))) { 848 handle->position = (i + 1); 849 const int error = FindEntry(archive, i, data); 850 if (!error) { 851 name->name = hash_table[i].name; 852 name->name_length = hash_table[i].name_length; 853 } 854 855 return error; 856 } 857 } 858 859 handle->position = 0; 860 return kIterationEnd; 861} 862 863static int32_t InflateToFile(int fd, const ZipEntry* entry, 864 uint8_t* begin, uint32_t length, 865 uint64_t* crc_out) { 866 int32_t result = -1; 867 const uint32_t kBufSize = 32768; 868 uint8_t read_buf[kBufSize]; 869 uint8_t write_buf[kBufSize]; 870 z_stream zstream; 871 int zerr; 872 873 /* 874 * Initialize the zlib stream struct. 875 */ 876 memset(&zstream, 0, sizeof(zstream)); 877 zstream.zalloc = Z_NULL; 878 zstream.zfree = Z_NULL; 879 zstream.opaque = Z_NULL; 880 zstream.next_in = NULL; 881 zstream.avail_in = 0; 882 zstream.next_out = (Bytef*) write_buf; 883 zstream.avail_out = kBufSize; 884 zstream.data_type = Z_UNKNOWN; 885 886 /* 887 * Use the undocumented "negative window bits" feature to tell zlib 888 * that there's no zlib header waiting for it. 889 */ 890 zerr = inflateInit2(&zstream, -MAX_WBITS); 891 if (zerr != Z_OK) { 892 if (zerr == Z_VERSION_ERROR) { 893 ALOGE("Installed zlib is not compatible with linked version (%s)", 894 ZLIB_VERSION); 895 } else { 896 ALOGW("Call to inflateInit2 failed (zerr=%d)", zerr); 897 } 898 899 return kZlibError; 900 } 901 902 const uint32_t uncompressed_length = entry->uncompressed_length; 903 904 uint32_t compressed_length = entry->compressed_length; 905 uint32_t write_count = 0; 906 do { 907 /* read as much as we can */ 908 if (zstream.avail_in == 0) { 909 const ssize_t getSize = (compressed_length > kBufSize) ? kBufSize : compressed_length; 910 const ssize_t actual = TEMP_FAILURE_RETRY(read(fd, read_buf, getSize)); 911 if (actual != getSize) { 912 ALOGW("Zip: inflate read failed (%d vs %zd)", actual, getSize); 913 result = kIoError; 914 goto z_bail; 915 } 916 917 compressed_length -= getSize; 918 919 zstream.next_in = read_buf; 920 zstream.avail_in = getSize; 921 } 922 923 /* uncompress the data */ 924 zerr = inflate(&zstream, Z_NO_FLUSH); 925 if (zerr != Z_OK && zerr != Z_STREAM_END) { 926 ALOGW("Zip: inflate zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)", 927 zerr, zstream.next_in, zstream.avail_in, 928 zstream.next_out, zstream.avail_out); 929 result = kZlibError; 930 goto z_bail; 931 } 932 933 /* write when we're full or when we're done */ 934 if (zstream.avail_out == 0 || 935 (zerr == Z_STREAM_END && zstream.avail_out != kBufSize)) { 936 const size_t write_size = zstream.next_out - write_buf; 937 // The file might have declared a bogus length. 938 if (write_size + write_count > length) { 939 goto z_bail; 940 } 941 memcpy(begin + write_count, write_buf, write_size); 942 write_count += write_size; 943 944 zstream.next_out = write_buf; 945 zstream.avail_out = kBufSize; 946 } 947 } while (zerr == Z_OK); 948 949 assert(zerr == Z_STREAM_END); /* other errors should've been caught */ 950 951 // stream.adler holds the crc32 value for such streams. 952 *crc_out = zstream.adler; 953 954 if (zstream.total_out != uncompressed_length || compressed_length != 0) { 955 ALOGW("Zip: size mismatch on inflated file (%ld vs %zd)", 956 zstream.total_out, uncompressed_length); 957 result = kInconsistentInformation; 958 goto z_bail; 959 } 960 961 result = 0; 962 963z_bail: 964 inflateEnd(&zstream); /* free up any allocated structures */ 965 966 return result; 967} 968 969int32_t ExtractToMemory(ZipArchiveHandle handle, 970 ZipEntry* entry, uint8_t* begin, uint32_t size) { 971 ZipArchive* archive = (ZipArchive*) handle; 972 const uint16_t method = entry->method; 973 off64_t data_offset = entry->offset; 974 975 if (lseek64(archive->fd, data_offset, SEEK_SET) != data_offset) { 976 ALOGW("Zip: lseek to data at %lld failed", (off64_t) data_offset); 977 return kIoError; 978 } 979 980 // this should default to kUnknownCompressionMethod. 981 int32_t return_value = -1; 982 uint64_t crc = 0; 983 if (method == kCompressStored) { 984 return_value = CopyFileToFile(archive->fd, begin, size, &crc); 985 } else if (method == kCompressDeflated) { 986 return_value = InflateToFile(archive->fd, entry, begin, size, &crc); 987 } 988 989 if (!return_value && entry->has_data_descriptor) { 990 return_value = UpdateEntryFromDataDescriptor(archive->fd, entry); 991 if (return_value) { 992 return return_value; 993 } 994 } 995 996 // TODO: Fix this check by passing the right flags to inflate2 so that 997 // it calculates the CRC for us. 998 if (entry->crc32 != crc && false) { 999 ALOGW("Zip: crc mismatch: expected %u, was %llu", entry->crc32, crc); 1000 return kInconsistentInformation; 1001 } 1002 1003 return return_value; 1004} 1005 1006int32_t ExtractEntryToFile(ZipArchiveHandle handle, 1007 ZipEntry* entry, int fd) { 1008 const int32_t declared_length = entry->uncompressed_length; 1009 1010 int result = TEMP_FAILURE_RETRY(ftruncate(fd, declared_length)); 1011 if (result == -1) { 1012 ALOGW("Zip: unable to truncate file to %ud", declared_length); 1013 return kIoError; 1014 } 1015 1016 android::FileMap* map = MapFileSegment(fd, 0, declared_length, 1017 false, kTempMappingFileName); 1018 if (map == NULL) { 1019 return kMmapFailed; 1020 } 1021 1022 const int32_t error = ExtractToMemory(handle, entry, 1023 reinterpret_cast<uint8_t*>(map->getDataPtr()), 1024 map->getDataLength()); 1025 map->release(); 1026 return error; 1027} 1028 1029const char* ErrorCodeString(int32_t error_code) { 1030 if (error_code > kErrorMessageLowerBound && error_code < kErrorMessageUpperBound) { 1031 return kErrorMessages[error_code * -1]; 1032 } 1033 1034 return kErrorMessages[0]; 1035} 1036 1037int GetFileDescriptor(const ZipArchiveHandle handle) { 1038 return ((ZipArchive*) handle)->fd; 1039} 1040 1041