ZipFileRO.cpp revision 7c1b96a165f970a09ed239bb4fb3f1b0d8f2a407
1/* 2 * Copyright (C) 2007 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17// 18// Read-only access to Zip archives, with minimal heap allocation. 19// 20#define LOG_TAG "zipro" 21//#define LOG_NDEBUG 0 22#include "utils/ZipFileRO.h" 23#include "utils/Log.h" 24#include "utils/misc.h" 25 26#include <zlib.h> 27 28#include <string.h> 29#include <fcntl.h> 30#include <errno.h> 31#include <assert.h> 32 33using namespace android; 34 35/* 36 * Zip file constants. 37 */ 38#define kEOCDSignature 0x06054b50 39#define kEOCDLen 22 40#define kEOCDNumEntries 8 // offset to #of entries in file 41#define kEOCDFileOffset 16 // offset to central directory 42 43#define kMaxCommentLen 65535 // longest possible in ushort 44#define kMaxEOCDSearch (kMaxCommentLen + kEOCDLen) 45 46#define kLFHSignature 0x04034b50 47#define kLFHLen 30 // excluding variable-len fields 48#define kLFHNameLen 26 // offset to filename length 49#define kLFHExtraLen 28 // offset to extra length 50 51#define kCDESignature 0x02014b50 52#define kCDELen 46 // excluding variable-len fields 53#define kCDEMethod 10 // offset to compression method 54#define kCDEModWhen 12 // offset to modification timestamp 55#define kCDECRC 16 // offset to entry CRC 56#define kCDECompLen 20 // offset to compressed length 57#define kCDEUncompLen 24 // offset to uncompressed length 58#define kCDENameLen 28 // offset to filename length 59#define kCDEExtraLen 30 // offset to extra length 60#define kCDECommentLen 32 // offset to comment length 61#define kCDELocalOffset 42 // offset to local hdr 62 63/* 64 * The values we return for ZipEntryRO use 0 as an invalid value, so we 65 * want to adjust the hash table index by a fixed amount. Using a large 66 * value helps insure that people don't mix & match arguments, e.g. to 67 * findEntryByIndex(). 68 */ 69#define kZipEntryAdj 10000 70 71/* 72 * Convert a ZipEntryRO to a hash table index, verifying that it's in a 73 * valid range. 74 */ 75int ZipFileRO::entryToIndex(const ZipEntryRO entry) const 76{ 77 long ent = ((long) entry) - kZipEntryAdj; 78 if (ent < 0 || ent >= mHashTableSize || mHashTable[ent].name == NULL) { 79 LOGW("Invalid ZipEntryRO %p (%ld)\n", entry, ent); 80 return -1; 81 } 82 return ent; 83} 84 85 86/* 87 * Open the specified file read-only. We memory-map the entire thing and 88 * close the file before returning. 89 */ 90status_t ZipFileRO::open(const char* zipFileName) 91{ 92 int fd = -1; 93 off_t length; 94 95 assert(mFileMap == NULL); 96 97 /* 98 * Open and map the specified file. 99 */ 100 fd = ::open(zipFileName, O_RDONLY); 101 if (fd < 0) { 102 LOGW("Unable to open zip '%s': %s\n", zipFileName, strerror(errno)); 103 return NAME_NOT_FOUND; 104 } 105 106 length = lseek(fd, 0, SEEK_END); 107 if (length < 0) { 108 close(fd); 109 return UNKNOWN_ERROR; 110 } 111 112 mFileMap = new FileMap(); 113 if (mFileMap == NULL) { 114 close(fd); 115 return NO_MEMORY; 116 } 117 if (!mFileMap->create(zipFileName, fd, 0, length, true)) { 118 LOGW("Unable to map '%s': %s\n", zipFileName, strerror(errno)); 119 close(fd); 120 return UNKNOWN_ERROR; 121 } 122 123 mFd = fd; 124 125 /* 126 * Got it mapped, verify it and create data structures for fast access. 127 */ 128 if (!parseZipArchive()) { 129 mFileMap->release(); 130 mFileMap = NULL; 131 return UNKNOWN_ERROR; 132 } 133 134 return OK; 135} 136 137/* 138 * Parse the Zip archive, verifying its contents and initializing internal 139 * data structures. 140 */ 141bool ZipFileRO::parseZipArchive(void) 142{ 143#define CHECK_OFFSET(_off) { \ 144 if ((unsigned int) (_off) >= maxOffset) { \ 145 LOGE("ERROR: bad offset %u (max %d): %s\n", \ 146 (unsigned int) (_off), maxOffset, #_off); \ 147 goto bail; \ 148 } \ 149 } 150 const unsigned char* basePtr = (const unsigned char*)mFileMap->getDataPtr(); 151 const unsigned char* ptr; 152 size_t length = mFileMap->getDataLength(); 153 bool result = false; 154 unsigned int i, numEntries, cdOffset; 155 unsigned int val; 156 157 /* 158 * The first 4 bytes of the file will either be the local header 159 * signature for the first file (kLFHSignature) or, if the archive doesn't 160 * have any files in it, the end-of-central-directory signature 161 * (kEOCDSignature). 162 */ 163 val = get4LE(basePtr); 164 if (val == kEOCDSignature) { 165 LOGI("Found Zip archive, but it looks empty\n"); 166 goto bail; 167 } else if (val != kLFHSignature) { 168 LOGV("Not a Zip archive (found 0x%08x)\n", val); 169 goto bail; 170 } 171 172 /* 173 * Find the EOCD. We'll find it immediately unless they have a file 174 * comment. 175 */ 176 ptr = basePtr + length - kEOCDLen; 177 178 while (ptr >= basePtr) { 179 if (*ptr == (kEOCDSignature & 0xff) && get4LE(ptr) == kEOCDSignature) 180 break; 181 ptr--; 182 } 183 if (ptr < basePtr) { 184 LOGI("Could not find end-of-central-directory in Zip\n"); 185 goto bail; 186 } 187 188 /* 189 * There are two interesting items in the EOCD block: the number of 190 * entries in the file, and the file offset of the start of the 191 * central directory. 192 * 193 * (There's actually a count of the #of entries in this file, and for 194 * all files which comprise a spanned archive, but for our purposes 195 * we're only interested in the current file. Besides, we expect the 196 * two to be equivalent for our stuff.) 197 */ 198 numEntries = get2LE(ptr + kEOCDNumEntries); 199 cdOffset = get4LE(ptr + kEOCDFileOffset); 200 201 /* valid offsets are [0,EOCD] */ 202 unsigned int maxOffset; 203 maxOffset = (ptr - basePtr) +1; 204 205 LOGV("+++ numEntries=%d cdOffset=%d\n", numEntries, cdOffset); 206 if (numEntries == 0 || cdOffset >= length) { 207 LOGW("Invalid entries=%d offset=%d (len=%zd)\n", 208 numEntries, cdOffset, length); 209 goto bail; 210 } 211 212 /* 213 * Create hash table. We have a minimum 75% load factor, possibly as 214 * low as 50% after we round off to a power of 2. 215 */ 216 mNumEntries = numEntries; 217 mHashTableSize = roundUpPower2(1 + ((numEntries * 4) / 3)); 218 mHashTable = (HashEntry*) calloc(1, sizeof(HashEntry) * mHashTableSize); 219 220 /* 221 * Walk through the central directory, adding entries to the hash 222 * table. 223 */ 224 ptr = basePtr + cdOffset; 225 for (i = 0; i < numEntries; i++) { 226 unsigned int fileNameLen, extraLen, commentLen, localHdrOffset; 227 const unsigned char* localHdr; 228 unsigned int hash; 229 230 if (get4LE(ptr) != kCDESignature) { 231 LOGW("Missed a central dir sig (at %d)\n", i); 232 goto bail; 233 } 234 if (ptr + kCDELen > basePtr + length) { 235 LOGW("Ran off the end (at %d)\n", i); 236 goto bail; 237 } 238 239 localHdrOffset = get4LE(ptr + kCDELocalOffset); 240 CHECK_OFFSET(localHdrOffset); 241 fileNameLen = get2LE(ptr + kCDENameLen); 242 extraLen = get2LE(ptr + kCDEExtraLen); 243 commentLen = get2LE(ptr + kCDECommentLen); 244 245 //LOGV("+++ %d: localHdr=%d fnl=%d el=%d cl=%d\n", 246 // i, localHdrOffset, fileNameLen, extraLen, commentLen); 247 //LOGV(" '%.*s'\n", fileNameLen, ptr + kCDELen); 248 249 /* add the CDE filename to the hash table */ 250 hash = computeHash((const char*)ptr + kCDELen, fileNameLen); 251 addToHash((const char*)ptr + kCDELen, fileNameLen, hash); 252 253 localHdr = basePtr + localHdrOffset; 254 if (get4LE(localHdr) != kLFHSignature) { 255 LOGW("Bad offset to local header: %d (at %d)\n", 256 localHdrOffset, i); 257 goto bail; 258 } 259 260 ptr += kCDELen + fileNameLen + extraLen + commentLen; 261 CHECK_OFFSET(ptr - basePtr); 262 } 263 264 result = true; 265 266bail: 267 return result; 268#undef CHECK_OFFSET 269} 270 271 272/* 273 * Simple string hash function for non-null-terminated strings. 274 */ 275/*static*/ unsigned int ZipFileRO::computeHash(const char* str, int len) 276{ 277 unsigned int hash = 0; 278 279 while (len--) 280 hash = hash * 31 + *str++; 281 282 return hash; 283} 284 285/* 286 * Add a new entry to the hash table. 287 */ 288void ZipFileRO::addToHash(const char* str, int strLen, unsigned int hash) 289{ 290 int ent = hash & (mHashTableSize-1); 291 292 /* 293 * We over-allocate the table, so we're guaranteed to find an empty slot. 294 */ 295 while (mHashTable[ent].name != NULL) 296 ent = (ent + 1) & (mHashTableSize-1); 297 298 mHashTable[ent].name = str; 299 mHashTable[ent].nameLen = strLen; 300} 301 302/* 303 * Find a matching entry. 304 * 305 * Returns 0 if not found. 306 */ 307ZipEntryRO ZipFileRO::findEntryByName(const char* fileName) const 308{ 309 int nameLen = strlen(fileName); 310 unsigned int hash = computeHash(fileName, nameLen); 311 int ent = hash & (mHashTableSize-1); 312 313 while (mHashTable[ent].name != NULL) { 314 if (mHashTable[ent].nameLen == nameLen && 315 memcmp(mHashTable[ent].name, fileName, nameLen) == 0) 316 { 317 /* match */ 318 return (ZipEntryRO) (ent + kZipEntryAdj); 319 } 320 321 ent = (ent + 1) & (mHashTableSize-1); 322 } 323 324 return NULL; 325} 326 327/* 328 * Find the Nth entry. 329 * 330 * This currently involves walking through the sparse hash table, counting 331 * non-empty entries. If we need to speed this up we can either allocate 332 * a parallel lookup table or (perhaps better) provide an iterator interface. 333 */ 334ZipEntryRO ZipFileRO::findEntryByIndex(int idx) const 335{ 336 if (idx < 0 || idx >= mNumEntries) { 337 LOGW("Invalid index %d\n", idx); 338 return NULL; 339 } 340 341 for (int ent = 0; ent < mHashTableSize; ent++) { 342 if (mHashTable[ent].name != NULL) { 343 if (idx-- == 0) 344 return (ZipEntryRO) (ent + kZipEntryAdj); 345 } 346 } 347 348 return NULL; 349} 350 351/* 352 * Get the useful fields from the zip entry. 353 * 354 * Returns "false" if the offsets to the fields or the contents of the fields 355 * appear to be bogus. 356 */ 357bool ZipFileRO::getEntryInfo(ZipEntryRO entry, int* pMethod, long* pUncompLen, 358 long* pCompLen, off_t* pOffset, long* pModWhen, long* pCrc32) const 359{ 360 int ent = entryToIndex(entry); 361 if (ent < 0) 362 return false; 363 364 /* 365 * Recover the start of the central directory entry from the filename 366 * pointer. 367 */ 368 const unsigned char* basePtr = (const unsigned char*)mFileMap->getDataPtr(); 369 const unsigned char* ptr = (const unsigned char*) mHashTable[ent].name; 370 size_t zipLength = mFileMap->getDataLength(); 371 372 ptr -= kCDELen; 373 374 int method = get2LE(ptr + kCDEMethod); 375 if (pMethod != NULL) 376 *pMethod = method; 377 378 if (pModWhen != NULL) 379 *pModWhen = get4LE(ptr + kCDEModWhen); 380 if (pCrc32 != NULL) 381 *pCrc32 = get4LE(ptr + kCDECRC); 382 383 /* 384 * We need to make sure that the lengths are not so large that somebody 385 * trying to map the compressed or uncompressed data runs off the end 386 * of the mapped region. 387 */ 388 unsigned long localHdrOffset = get4LE(ptr + kCDELocalOffset); 389 if (localHdrOffset + kLFHLen >= zipLength) { 390 LOGE("ERROR: bad local hdr offset in zip\n"); 391 return false; 392 } 393 const unsigned char* localHdr = basePtr + localHdrOffset; 394 off_t dataOffset = localHdrOffset + kLFHLen 395 + get2LE(localHdr + kLFHNameLen) + get2LE(localHdr + kLFHExtraLen); 396 if ((unsigned long) dataOffset >= zipLength) { 397 LOGE("ERROR: bad data offset in zip\n"); 398 return false; 399 } 400 401 if (pCompLen != NULL) { 402 *pCompLen = get4LE(ptr + kCDECompLen); 403 if (*pCompLen < 0 || (size_t)(dataOffset + *pCompLen) >= zipLength) { 404 LOGE("ERROR: bad compressed length in zip\n"); 405 return false; 406 } 407 } 408 if (pUncompLen != NULL) { 409 *pUncompLen = get4LE(ptr + kCDEUncompLen); 410 if (*pUncompLen < 0) { 411 LOGE("ERROR: negative uncompressed length in zip\n"); 412 return false; 413 } 414 if (method == kCompressStored && 415 (size_t)(dataOffset + *pUncompLen) >= zipLength) 416 { 417 LOGE("ERROR: bad uncompressed length in zip\n"); 418 return false; 419 } 420 } 421 422 if (pOffset != NULL) { 423 *pOffset = dataOffset; 424 } 425 return true; 426} 427 428/* 429 * Copy the entry's filename to the buffer. 430 */ 431int ZipFileRO::getEntryFileName(ZipEntryRO entry, char* buffer, int bufLen) 432 const 433{ 434 int ent = entryToIndex(entry); 435 if (ent < 0) 436 return -1; 437 438 int nameLen = mHashTable[ent].nameLen; 439 if (bufLen < nameLen+1) 440 return nameLen+1; 441 442 memcpy(buffer, mHashTable[ent].name, nameLen); 443 buffer[nameLen] = '\0'; 444 return 0; 445} 446 447/* 448 * Create a new FileMap object that spans the data in "entry". 449 */ 450FileMap* ZipFileRO::createEntryFileMap(ZipEntryRO entry) const 451{ 452 /* 453 * TODO: the efficient way to do this is to modify FileMap to allow 454 * sub-regions of a file to be mapped. A reference-counting scheme 455 * can manage the base memory mapping. For now, we just create a brand 456 * new mapping off of the Zip archive file descriptor. 457 */ 458 459 FileMap* newMap; 460 long compLen; 461 off_t offset; 462 463 if (!getEntryInfo(entry, NULL, NULL, &compLen, &offset, NULL, NULL)) 464 return NULL; 465 466 newMap = new FileMap(); 467 if (!newMap->create(mFileMap->getFileName(), mFd, offset, compLen, true)) { 468 newMap->release(); 469 return NULL; 470 } 471 472 return newMap; 473} 474 475/* 476 * Uncompress an entry, in its entirety, into the provided output buffer. 477 * 478 * This doesn't verify the data's CRC, which might be useful for 479 * uncompressed data. The caller should be able to manage it. 480 */ 481bool ZipFileRO::uncompressEntry(ZipEntryRO entry, void* buffer) const 482{ 483 const int kSequentialMin = 32768; 484 bool result = false; 485 int ent = entryToIndex(entry); 486 if (ent < 0) 487 return -1; 488 489 const unsigned char* basePtr = (const unsigned char*)mFileMap->getDataPtr(); 490 int method; 491 long uncompLen, compLen; 492 off_t offset; 493 494 getEntryInfo(entry, &method, &uncompLen, &compLen, &offset, NULL, NULL); 495 496 /* 497 * Experiment with madvise hint. When we want to uncompress a file, 498 * we pull some stuff out of the central dir entry and then hit a 499 * bunch of compressed or uncompressed data sequentially. The CDE 500 * visit will cause a limited amount of read-ahead because it's at 501 * the end of the file. We could end up doing lots of extra disk 502 * access if the file we're prying open is small. Bottom line is we 503 * probably don't want to turn MADV_SEQUENTIAL on and leave it on. 504 * 505 * So, if the compressed size of the file is above a certain minimum 506 * size, temporarily boost the read-ahead in the hope that the extra 507 * pair of system calls are negated by a reduction in page faults. 508 */ 509 if (compLen > kSequentialMin) 510 mFileMap->advise(FileMap::SEQUENTIAL); 511 512 if (method == kCompressStored) { 513 memcpy(buffer, basePtr + offset, uncompLen); 514 } else { 515 if (!inflateBuffer(buffer, basePtr + offset, uncompLen, compLen)) 516 goto bail; 517 } 518 519 if (compLen > kSequentialMin) 520 mFileMap->advise(FileMap::NORMAL); 521 522 result = true; 523 524bail: 525 return result; 526} 527 528/* 529 * Uncompress an entry, in its entirety, to an open file descriptor. 530 * 531 * This doesn't verify the data's CRC, but probably should. 532 */ 533bool ZipFileRO::uncompressEntry(ZipEntryRO entry, int fd) const 534{ 535 bool result = false; 536 int ent = entryToIndex(entry); 537 if (ent < 0) 538 return -1; 539 540 const unsigned char* basePtr = (const unsigned char*)mFileMap->getDataPtr(); 541 int method; 542 long uncompLen, compLen; 543 off_t offset; 544 545 getEntryInfo(entry, &method, &uncompLen, &compLen, &offset, NULL, NULL); 546 547 if (method == kCompressStored) { 548 ssize_t actual; 549 550 actual = write(fd, basePtr + offset, uncompLen); 551 if (actual < 0) { 552 LOGE("Write failed: %s\n", strerror(errno)); 553 goto bail; 554 } else if (actual != uncompLen) { 555 LOGE("Partial write during uncompress (%d of %ld)\n", 556 (int)actual, uncompLen); 557 goto bail; 558 } else { 559 LOGI("+++ successful write\n"); 560 } 561 } else { 562 if (!inflateBuffer(fd, basePtr+offset, uncompLen, compLen)) 563 goto bail; 564 } 565 566 result = true; 567 568bail: 569 return result; 570} 571 572/* 573 * Uncompress "deflate" data from one buffer to another. 574 */ 575/*static*/ bool ZipFileRO::inflateBuffer(void* outBuf, const void* inBuf, 576 long uncompLen, long compLen) 577{ 578 bool result = false; 579 z_stream zstream; 580 int zerr; 581 582 /* 583 * Initialize the zlib stream struct. 584 */ 585 memset(&zstream, 0, sizeof(zstream)); 586 zstream.zalloc = Z_NULL; 587 zstream.zfree = Z_NULL; 588 zstream.opaque = Z_NULL; 589 zstream.next_in = (Bytef*)inBuf; 590 zstream.avail_in = compLen; 591 zstream.next_out = (Bytef*) outBuf; 592 zstream.avail_out = uncompLen; 593 zstream.data_type = Z_UNKNOWN; 594 595 /* 596 * Use the undocumented "negative window bits" feature to tell zlib 597 * that there's no zlib header waiting for it. 598 */ 599 zerr = inflateInit2(&zstream, -MAX_WBITS); 600 if (zerr != Z_OK) { 601 if (zerr == Z_VERSION_ERROR) { 602 LOGE("Installed zlib is not compatible with linked version (%s)\n", 603 ZLIB_VERSION); 604 } else { 605 LOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr); 606 } 607 goto bail; 608 } 609 610 /* 611 * Expand data. 612 */ 613 zerr = inflate(&zstream, Z_FINISH); 614 if (zerr != Z_STREAM_END) { 615 LOGW("Zip inflate failed, zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n", 616 zerr, zstream.next_in, zstream.avail_in, 617 zstream.next_out, zstream.avail_out); 618 goto z_bail; 619 } 620 621 /* paranoia */ 622 if ((long) zstream.total_out != uncompLen) { 623 LOGW("Size mismatch on inflated file (%ld vs %ld)\n", 624 zstream.total_out, uncompLen); 625 goto z_bail; 626 } 627 628 result = true; 629 630z_bail: 631 inflateEnd(&zstream); /* free up any allocated structures */ 632 633bail: 634 return result; 635} 636 637/* 638 * Uncompress "deflate" data from one buffer to an open file descriptor. 639 */ 640/*static*/ bool ZipFileRO::inflateBuffer(int fd, const void* inBuf, 641 long uncompLen, long compLen) 642{ 643 bool result = false; 644 const int kWriteBufSize = 32768; 645 unsigned char writeBuf[kWriteBufSize]; 646 z_stream zstream; 647 int zerr; 648 649 /* 650 * Initialize the zlib stream struct. 651 */ 652 memset(&zstream, 0, sizeof(zstream)); 653 zstream.zalloc = Z_NULL; 654 zstream.zfree = Z_NULL; 655 zstream.opaque = Z_NULL; 656 zstream.next_in = (Bytef*)inBuf; 657 zstream.avail_in = compLen; 658 zstream.next_out = (Bytef*) writeBuf; 659 zstream.avail_out = sizeof(writeBuf); 660 zstream.data_type = Z_UNKNOWN; 661 662 /* 663 * Use the undocumented "negative window bits" feature to tell zlib 664 * that there's no zlib header waiting for it. 665 */ 666 zerr = inflateInit2(&zstream, -MAX_WBITS); 667 if (zerr != Z_OK) { 668 if (zerr == Z_VERSION_ERROR) { 669 LOGE("Installed zlib is not compatible with linked version (%s)\n", 670 ZLIB_VERSION); 671 } else { 672 LOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr); 673 } 674 goto bail; 675 } 676 677 /* 678 * Loop while we have more to do. 679 */ 680 do { 681 /* 682 * Expand data. 683 */ 684 zerr = inflate(&zstream, Z_NO_FLUSH); 685 if (zerr != Z_OK && zerr != Z_STREAM_END) { 686 LOGW("zlib inflate: zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n", 687 zerr, zstream.next_in, zstream.avail_in, 688 zstream.next_out, zstream.avail_out); 689 goto z_bail; 690 } 691 692 /* write when we're full or when we're done */ 693 if (zstream.avail_out == 0 || 694 (zerr == Z_STREAM_END && zstream.avail_out != sizeof(writeBuf))) 695 { 696 long writeSize = zstream.next_out - writeBuf; 697 int cc = write(fd, writeBuf, writeSize); 698 if (cc != (int) writeSize) { 699 LOGW("write failed in inflate (%d vs %ld)\n", cc, writeSize); 700 goto z_bail; 701 } 702 703 zstream.next_out = writeBuf; 704 zstream.avail_out = sizeof(writeBuf); 705 } 706 } while (zerr == Z_OK); 707 708 assert(zerr == Z_STREAM_END); /* other errors should've been caught */ 709 710 /* paranoia */ 711 if ((long) zstream.total_out != uncompLen) { 712 LOGW("Size mismatch on inflated file (%ld vs %ld)\n", 713 zstream.total_out, uncompLen); 714 goto z_bail; 715 } 716 717 result = true; 718 719z_bail: 720 inflateEnd(&zstream); /* free up any allocated structures */ 721 722bail: 723 return result; 724} 725