Zip.c revision 596271fa71d79e3eec03c7cf6ac76cb026dd8578
1/* 2 * Copyright 2006 The Android Open Source Project 3 * 4 * Simple Zip file support. 5 */ 6#include "safe_iop.h" 7#include "zlib.h" 8 9#include <errno.h> 10#include <fcntl.h> 11#include <limits.h> 12#include <stdint.h> // for uintptr_t 13#include <stdlib.h> 14#include <sys/stat.h> // for S_ISLNK() 15#include <unistd.h> 16 17#define LOG_TAG "minzip" 18#include "Zip.h" 19#include "Bits.h" 20#include "Log.h" 21#include "DirUtil.h" 22 23#undef NDEBUG // do this after including Log.h 24#include <assert.h> 25 26#define SORT_ENTRIES 1 27 28/* 29 * Offset and length constants (java.util.zip naming convention). 30 */ 31enum { 32 CENSIG = 0x02014b50, // PK12 33 CENHDR = 46, 34 35 CENVEM = 4, 36 CENVER = 6, 37 CENFLG = 8, 38 CENHOW = 10, 39 CENTIM = 12, 40 CENCRC = 16, 41 CENSIZ = 20, 42 CENLEN = 24, 43 CENNAM = 28, 44 CENEXT = 30, 45 CENCOM = 32, 46 CENDSK = 34, 47 CENATT = 36, 48 CENATX = 38, 49 CENOFF = 42, 50 51 ENDSIG = 0x06054b50, // PK56 52 ENDHDR = 22, 53 54 ENDSUB = 8, 55 ENDTOT = 10, 56 ENDSIZ = 12, 57 ENDOFF = 16, 58 ENDCOM = 20, 59 60 EXTSIG = 0x08074b50, // PK78 61 EXTHDR = 16, 62 63 EXTCRC = 4, 64 EXTSIZ = 8, 65 EXTLEN = 12, 66 67 LOCSIG = 0x04034b50, // PK34 68 LOCHDR = 30, 69 70 LOCVER = 4, 71 LOCFLG = 6, 72 LOCHOW = 8, 73 LOCTIM = 10, 74 LOCCRC = 14, 75 LOCSIZ = 18, 76 LOCLEN = 22, 77 LOCNAM = 26, 78 LOCEXT = 28, 79 80 STORED = 0, 81 DEFLATED = 8, 82 83 CENVEM_UNIX = 3 << 8, // the high byte of CENVEM 84}; 85 86 87/* 88 * For debugging, dump the contents of a ZipEntry. 89 */ 90#if 0 91static void dumpEntry(const ZipEntry* pEntry) 92{ 93 LOGI(" %p '%.*s'\n", pEntry->fileName,pEntry->fileNameLen,pEntry->fileName); 94 LOGI(" off=%ld comp=%ld uncomp=%ld how=%d\n", pEntry->offset, 95 pEntry->compLen, pEntry->uncompLen, pEntry->compression); 96} 97#endif 98 99/* 100 * (This is a mzHashTableLookup callback.) 101 * 102 * Compare two ZipEntry structs, by name. 103 */ 104static int hashcmpZipEntry(const void* ventry1, const void* ventry2) 105{ 106 const ZipEntry* entry1 = (const ZipEntry*) ventry1; 107 const ZipEntry* entry2 = (const ZipEntry*) ventry2; 108 109 if (entry1->fileNameLen != entry2->fileNameLen) 110 return entry1->fileNameLen - entry2->fileNameLen; 111 return memcmp(entry1->fileName, entry2->fileName, entry1->fileNameLen); 112} 113 114/* 115 * (This is a mzHashTableLookup callback.) 116 * 117 * find a ZipEntry struct by name. 118 */ 119static int hashcmpZipName(const void* ventry, const void* vname) 120{ 121 const ZipEntry* entry = (const ZipEntry*) ventry; 122 const char* name = (const char*) vname; 123 unsigned int nameLen = strlen(name); 124 125 if (entry->fileNameLen != nameLen) 126 return entry->fileNameLen - nameLen; 127 return memcmp(entry->fileName, name, nameLen); 128} 129 130/* 131 * Compute the hash code for a ZipEntry filename. 132 * 133 * Not expected to be compatible with any other hash function, so we init 134 * to 2 to ensure it doesn't happen to match. 135 */ 136static unsigned int computeHash(const char* name, int nameLen) 137{ 138 unsigned int hash = 2; 139 140 while (nameLen--) 141 hash = hash * 31 + *name++; 142 143 return hash; 144} 145 146static void addEntryToHashTable(HashTable* pHash, ZipEntry* pEntry) 147{ 148 unsigned int itemHash = computeHash(pEntry->fileName, pEntry->fileNameLen); 149 const ZipEntry* found; 150 151 found = (const ZipEntry*)mzHashTableLookup(pHash, 152 itemHash, pEntry, hashcmpZipEntry, true); 153 if (found != pEntry) { 154 LOGW("WARNING: duplicate entry '%.*s' in Zip\n", 155 found->fileNameLen, found->fileName); 156 /* keep going */ 157 } 158} 159 160static int validFilename(const char *fileName, unsigned int fileNameLen) 161{ 162 // Forbid super long filenames. 163 if (fileNameLen >= PATH_MAX) { 164 LOGW("Filename too long (%d chatacters)\n", fileNameLen); 165 return 0; 166 } 167 168 // Require all characters to be printable ASCII (no NUL, no UTF-8, etc). 169 unsigned int i; 170 for (i = 0; i < fileNameLen; ++i) { 171 if (fileName[i] < 32 || fileName[i] >= 127) { 172 LOGW("Filename contains invalid character '\%03o'\n", fileName[i]); 173 return 0; 174 } 175 } 176 177 return 1; 178} 179 180/* 181 * Parse the contents of a Zip archive. After confirming that the file 182 * is in fact a Zip, we scan out the contents of the central directory and 183 * store it in a hash table. 184 * 185 * Returns "true" on success. 186 */ 187static bool parseZipArchive(ZipArchive* pArchive, const MemMapping* pMap) 188{ 189 bool result = false; 190 const unsigned char* ptr; 191 unsigned int i, numEntries, cdOffset; 192 unsigned int val; 193 194 /* 195 * The first 4 bytes of the file will either be the local header 196 * signature for the first file (LOCSIG) or, if the archive doesn't 197 * have any files in it, the end-of-central-directory signature (ENDSIG). 198 */ 199 val = get4LE(pMap->addr); 200 if (val == ENDSIG) { 201 LOGI("Found Zip archive, but it looks empty\n"); 202 goto bail; 203 } else if (val != LOCSIG) { 204 LOGV("Not a Zip archive (found 0x%08x)\n", val); 205 goto bail; 206 } 207 208 /* 209 * Find the EOCD. We'll find it immediately unless they have a file 210 * comment. 211 */ 212 ptr = pMap->addr + pMap->length - ENDHDR; 213 214 while (ptr >= (const unsigned char*) pMap->addr) { 215 if (*ptr == (ENDSIG & 0xff) && get4LE(ptr) == ENDSIG) 216 break; 217 ptr--; 218 } 219 if (ptr < (const unsigned char*) pMap->addr) { 220 LOGI("Could not find end-of-central-directory in Zip\n"); 221 goto bail; 222 } 223 224 /* 225 * There are two interesting items in the EOCD block: the number of 226 * entries in the file, and the file offset of the start of the 227 * central directory. 228 */ 229 numEntries = get2LE(ptr + ENDSUB); 230 cdOffset = get4LE(ptr + ENDOFF); 231 232 LOGVV("numEntries=%d cdOffset=%d\n", numEntries, cdOffset); 233 if (numEntries == 0 || cdOffset >= pMap->length) { 234 LOGW("Invalid entries=%d offset=%d (len=%zd)\n", 235 numEntries, cdOffset, pMap->length); 236 goto bail; 237 } 238 239 /* 240 * Create data structures to hold entries. 241 */ 242 pArchive->numEntries = numEntries; 243 pArchive->pEntries = (ZipEntry*) calloc(numEntries, sizeof(ZipEntry)); 244 pArchive->pHash = mzHashTableCreate(mzHashSize(numEntries), NULL); 245 if (pArchive->pEntries == NULL || pArchive->pHash == NULL) 246 goto bail; 247 248 ptr = pMap->addr + cdOffset; 249 for (i = 0; i < numEntries; i++) { 250 ZipEntry* pEntry; 251 unsigned int fileNameLen, extraLen, commentLen, localHdrOffset; 252 const unsigned char* localHdr; 253 const char *fileName; 254 255 if (ptr + CENHDR > (const unsigned char*)pMap->addr + pMap->length) { 256 LOGW("Ran off the end (at %d)\n", i); 257 goto bail; 258 } 259 if (get4LE(ptr) != CENSIG) { 260 LOGW("Missed a central dir sig (at %d)\n", i); 261 goto bail; 262 } 263 264 localHdrOffset = get4LE(ptr + CENOFF); 265 fileNameLen = get2LE(ptr + CENNAM); 266 extraLen = get2LE(ptr + CENEXT); 267 commentLen = get2LE(ptr + CENCOM); 268 fileName = (const char*)ptr + CENHDR; 269 if (fileName + fileNameLen > (const char*)pMap->addr + pMap->length) { 270 LOGW("Filename ran off the end (at %d)\n", i); 271 goto bail; 272 } 273 if (!validFilename(fileName, fileNameLen)) { 274 LOGW("Invalid filename (at %d)\n", i); 275 goto bail; 276 } 277 278#if SORT_ENTRIES 279 /* Figure out where this entry should go (binary search). 280 */ 281 if (i > 0) { 282 int low, high; 283 284 low = 0; 285 high = i - 1; 286 while (low <= high) { 287 int mid; 288 int diff; 289 int diffLen; 290 291 mid = low + ((high - low) / 2); // avoid overflow 292 293 if (pArchive->pEntries[mid].fileNameLen < fileNameLen) { 294 diffLen = pArchive->pEntries[mid].fileNameLen; 295 } else { 296 diffLen = fileNameLen; 297 } 298 diff = strncmp(pArchive->pEntries[mid].fileName, fileName, 299 diffLen); 300 if (diff == 0) { 301 diff = pArchive->pEntries[mid].fileNameLen - fileNameLen; 302 } 303 if (diff < 0) { 304 low = mid + 1; 305 } else if (diff > 0) { 306 high = mid - 1; 307 } else { 308 high = mid; 309 break; 310 } 311 } 312 313 unsigned int target = high + 1; 314 assert(target <= i); 315 if (target != i) { 316 /* It belongs somewhere other than at the end of 317 * the list. Make some room at [target]. 318 */ 319 memmove(pArchive->pEntries + target + 1, 320 pArchive->pEntries + target, 321 (i - target) * sizeof(ZipEntry)); 322 } 323 pEntry = &pArchive->pEntries[target]; 324 } else { 325 pEntry = &pArchive->pEntries[0]; 326 } 327#else 328 pEntry = &pArchive->pEntries[i]; 329#endif 330 331 //LOGI("%d: localHdr=%d fnl=%d el=%d cl=%d\n", 332 // i, localHdrOffset, fileNameLen, extraLen, commentLen); 333 334 pEntry->fileNameLen = fileNameLen; 335 pEntry->fileName = fileName; 336 337 pEntry->compLen = get4LE(ptr + CENSIZ); 338 pEntry->uncompLen = get4LE(ptr + CENLEN); 339 pEntry->compression = get2LE(ptr + CENHOW); 340 pEntry->modTime = get4LE(ptr + CENTIM); 341 pEntry->crc32 = get4LE(ptr + CENCRC); 342 343 /* These two are necessary for finding the mode of the file. 344 */ 345 pEntry->versionMadeBy = get2LE(ptr + CENVEM); 346 if ((pEntry->versionMadeBy & 0xff00) != 0 && 347 (pEntry->versionMadeBy & 0xff00) != CENVEM_UNIX) 348 { 349 LOGW("Incompatible \"version made by\": 0x%02x (at %d)\n", 350 pEntry->versionMadeBy >> 8, i); 351 goto bail; 352 } 353 pEntry->externalFileAttributes = get4LE(ptr + CENATX); 354 355 // Perform pMap->addr + localHdrOffset, ensuring that it won't 356 // overflow. This is needed because localHdrOffset is untrusted. 357 if (!safe_add((uintptr_t *)&localHdr, (uintptr_t)pMap->addr, 358 (uintptr_t)localHdrOffset)) { 359 LOGW("Integer overflow adding in parseZipArchive\n"); 360 goto bail; 361 } 362 if ((uintptr_t)localHdr + LOCHDR > 363 (uintptr_t)pMap->addr + pMap->length) { 364 LOGW("Bad offset to local header: %d (at %d)\n", localHdrOffset, i); 365 goto bail; 366 } 367 if (get4LE(localHdr) != LOCSIG) { 368 LOGW("Missed a local header sig (at %d)\n", i); 369 goto bail; 370 } 371 pEntry->offset = localHdrOffset + LOCHDR 372 + get2LE(localHdr + LOCNAM) + get2LE(localHdr + LOCEXT); 373 if (!safe_add(NULL, pEntry->offset, pEntry->compLen)) { 374 LOGW("Integer overflow adding in parseZipArchive\n"); 375 goto bail; 376 } 377 if ((size_t)pEntry->offset + pEntry->compLen > pMap->length) { 378 LOGW("Data ran off the end (at %d)\n", i); 379 goto bail; 380 } 381 382#if !SORT_ENTRIES 383 /* Add to hash table; no need to lock here. 384 * Can't do this now if we're sorting, because entries 385 * will move around. 386 */ 387 addEntryToHashTable(pArchive->pHash, pEntry); 388#endif 389 390 //dumpEntry(pEntry); 391 ptr += CENHDR + fileNameLen + extraLen + commentLen; 392 } 393 394#if SORT_ENTRIES 395 /* If we're sorting, we have to wait until all entries 396 * are in their final places, otherwise the pointers will 397 * probably point to the wrong things. 398 */ 399 for (i = 0; i < numEntries; i++) { 400 /* Add to hash table; no need to lock here. 401 */ 402 addEntryToHashTable(pArchive->pHash, &pArchive->pEntries[i]); 403 } 404#endif 405 406 result = true; 407 408bail: 409 if (!result) { 410 mzHashTableFree(pArchive->pHash); 411 pArchive->pHash = NULL; 412 } 413 return result; 414} 415 416/* 417 * Open a Zip archive and scan out the contents. 418 * 419 * The easiest way to do this is to mmap() the whole thing and do the 420 * traditional backward scan for central directory. Since the EOCD is 421 * a relatively small bit at the end, we should end up only touching a 422 * small set of pages. 423 * 424 * This will be called on non-Zip files, especially during startup, so 425 * we don't want to be too noisy about failures. (Do we want a "quiet" 426 * flag?) 427 * 428 * On success, we fill out the contents of "pArchive". 429 */ 430int mzOpenZipArchive(const char* fileName, ZipArchive* pArchive) 431{ 432 MemMapping map; 433 int err; 434 435 LOGV("Opening archive '%s' %p\n", fileName, pArchive); 436 437 map.addr = NULL; 438 memset(pArchive, 0, sizeof(*pArchive)); 439 440 pArchive->fd = open(fileName, O_RDONLY, 0); 441 if (pArchive->fd < 0) { 442 err = errno ? errno : -1; 443 LOGV("Unable to open '%s': %s\n", fileName, strerror(err)); 444 goto bail; 445 } 446 447 if (sysMapFileInShmem(pArchive->fd, &map) != 0) { 448 err = -1; 449 LOGW("Map of '%s' failed\n", fileName); 450 goto bail; 451 } 452 453 if (map.length < ENDHDR) { 454 err = -1; 455 LOGV("File '%s' too small to be zip (%zd)\n", fileName, map.length); 456 goto bail; 457 } 458 459 if (!parseZipArchive(pArchive, &map)) { 460 err = -1; 461 LOGV("Parsing '%s' failed\n", fileName); 462 goto bail; 463 } 464 465 err = 0; 466 sysCopyMap(&pArchive->map, &map); 467 map.addr = NULL; 468 469bail: 470 if (err != 0) 471 mzCloseZipArchive(pArchive); 472 if (map.addr != NULL) 473 sysReleaseShmem(&map); 474 return err; 475} 476 477/* 478 * Close a ZipArchive, closing the file and freeing the contents. 479 * 480 * NOTE: the ZipArchive may not have been fully created. 481 */ 482void mzCloseZipArchive(ZipArchive* pArchive) 483{ 484 LOGV("Closing archive %p\n", pArchive); 485 486 if (pArchive->fd >= 0) 487 close(pArchive->fd); 488 if (pArchive->map.addr != NULL) 489 sysReleaseShmem(&pArchive->map); 490 491 free(pArchive->pEntries); 492 493 mzHashTableFree(pArchive->pHash); 494 495 pArchive->fd = -1; 496 pArchive->pHash = NULL; 497 pArchive->pEntries = NULL; 498} 499 500/* 501 * Find a matching entry. 502 * 503 * Returns NULL if no matching entry found. 504 */ 505const ZipEntry* mzFindZipEntry(const ZipArchive* pArchive, 506 const char* entryName) 507{ 508 unsigned int itemHash = computeHash(entryName, strlen(entryName)); 509 510 return (const ZipEntry*)mzHashTableLookup(pArchive->pHash, 511 itemHash, (char*) entryName, hashcmpZipName, false); 512} 513 514/* 515 * Return true if the entry is a symbolic link. 516 */ 517bool mzIsZipEntrySymlink(const ZipEntry* pEntry) 518{ 519 if ((pEntry->versionMadeBy & 0xff00) == CENVEM_UNIX) { 520 return S_ISLNK(pEntry->externalFileAttributes >> 16); 521 } 522 return false; 523} 524 525/* Call processFunction on the uncompressed data of a STORED entry. 526 */ 527static bool processStoredEntry(const ZipArchive *pArchive, 528 const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction, 529 void *cookie) 530{ 531 size_t bytesLeft = pEntry->compLen; 532 while (bytesLeft > 0) { 533 unsigned char buf[32 * 1024]; 534 ssize_t n; 535 size_t count; 536 bool ret; 537 538 count = bytesLeft; 539 if (count > sizeof(buf)) { 540 count = sizeof(buf); 541 } 542 n = read(pArchive->fd, buf, count); 543 if (n < 0 || (size_t)n != count) { 544 LOGE("Can't read %zu bytes from zip file: %ld\n", count, n); 545 return false; 546 } 547 ret = processFunction(buf, n, cookie); 548 if (!ret) { 549 return false; 550 } 551 bytesLeft -= count; 552 } 553 return true; 554} 555 556static bool processDeflatedEntry(const ZipArchive *pArchive, 557 const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction, 558 void *cookie) 559{ 560 long result = -1; 561 unsigned char readBuf[32 * 1024]; 562 unsigned char procBuf[32 * 1024]; 563 z_stream zstream; 564 int zerr; 565 long compRemaining; 566 567 compRemaining = pEntry->compLen; 568 569 /* 570 * Initialize the zlib stream. 571 */ 572 memset(&zstream, 0, sizeof(zstream)); 573 zstream.zalloc = Z_NULL; 574 zstream.zfree = Z_NULL; 575 zstream.opaque = Z_NULL; 576 zstream.next_in = NULL; 577 zstream.avail_in = 0; 578 zstream.next_out = (Bytef*) procBuf; 579 zstream.avail_out = sizeof(procBuf); 580 zstream.data_type = Z_UNKNOWN; 581 582 /* 583 * Use the undocumented "negative window bits" feature to tell zlib 584 * that there's no zlib header waiting for it. 585 */ 586 zerr = inflateInit2(&zstream, -MAX_WBITS); 587 if (zerr != Z_OK) { 588 if (zerr == Z_VERSION_ERROR) { 589 LOGE("Installed zlib is not compatible with linked version (%s)\n", 590 ZLIB_VERSION); 591 } else { 592 LOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr); 593 } 594 goto bail; 595 } 596 597 /* 598 * Loop while we have data. 599 */ 600 do { 601 /* read as much as we can */ 602 if (zstream.avail_in == 0) { 603 long getSize = (compRemaining > (long)sizeof(readBuf)) ? 604 (long)sizeof(readBuf) : compRemaining; 605 LOGVV("+++ reading %ld bytes (%ld left)\n", 606 getSize, compRemaining); 607 608 int cc = read(pArchive->fd, readBuf, getSize); 609 if (cc != (int) getSize) { 610 LOGW("inflate read failed (%d vs %ld)\n", cc, getSize); 611 goto z_bail; 612 } 613 614 compRemaining -= getSize; 615 616 zstream.next_in = readBuf; 617 zstream.avail_in = getSize; 618 } 619 620 /* uncompress the data */ 621 zerr = inflate(&zstream, Z_NO_FLUSH); 622 if (zerr != Z_OK && zerr != Z_STREAM_END) { 623 LOGD("zlib inflate call failed (zerr=%d)\n", zerr); 624 goto z_bail; 625 } 626 627 /* write when we're full or when we're done */ 628 if (zstream.avail_out == 0 || 629 (zerr == Z_STREAM_END && zstream.avail_out != sizeof(procBuf))) 630 { 631 long procSize = zstream.next_out - procBuf; 632 LOGVV("+++ processing %d bytes\n", (int) procSize); 633 bool ret = processFunction(procBuf, procSize, cookie); 634 if (!ret) { 635 LOGW("Process function elected to fail (in inflate)\n"); 636 goto z_bail; 637 } 638 639 zstream.next_out = procBuf; 640 zstream.avail_out = sizeof(procBuf); 641 } 642 } while (zerr == Z_OK); 643 644 assert(zerr == Z_STREAM_END); /* other errors should've been caught */ 645 646 // success! 647 result = zstream.total_out; 648 649z_bail: 650 inflateEnd(&zstream); /* free up any allocated structures */ 651 652bail: 653 if (result != pEntry->uncompLen) { 654 if (result != -1) // error already shown? 655 LOGW("Size mismatch on inflated file (%ld vs %ld)\n", 656 result, pEntry->uncompLen); 657 return false; 658 } 659 return true; 660} 661 662/* 663 * Stream the uncompressed data through the supplied function, 664 * passing cookie to it each time it gets called. processFunction 665 * may be called more than once. 666 * 667 * If processFunction returns false, the operation is abandoned and 668 * mzProcessZipEntryContents() immediately returns false. 669 * 670 * This is useful for calculating the hash of an entry's uncompressed contents. 671 */ 672bool mzProcessZipEntryContents(const ZipArchive *pArchive, 673 const ZipEntry *pEntry, ProcessZipEntryContentsFunction processFunction, 674 void *cookie) 675{ 676 bool ret = false; 677 off_t oldOff; 678 679 /* save current offset */ 680 oldOff = lseek(pArchive->fd, 0, SEEK_CUR); 681 682 /* Seek to the beginning of the entry's compressed data. */ 683 lseek(pArchive->fd, pEntry->offset, SEEK_SET); 684 685 switch (pEntry->compression) { 686 case STORED: 687 ret = processStoredEntry(pArchive, pEntry, processFunction, cookie); 688 break; 689 case DEFLATED: 690 ret = processDeflatedEntry(pArchive, pEntry, processFunction, cookie); 691 break; 692 default: 693 LOGE("Unsupported compression type %d for entry '%s'\n", 694 pEntry->compression, pEntry->fileName); 695 break; 696 } 697 698 /* restore file offset */ 699 lseek(pArchive->fd, oldOff, SEEK_SET); 700 return ret; 701} 702 703static bool crcProcessFunction(const unsigned char *data, int dataLen, 704 void *crc) 705{ 706 *(unsigned long *)crc = crc32(*(unsigned long *)crc, data, dataLen); 707 return true; 708} 709 710/* 711 * Check the CRC on this entry; return true if it is correct. 712 * May do other internal checks as well. 713 */ 714bool mzIsZipEntryIntact(const ZipArchive *pArchive, const ZipEntry *pEntry) 715{ 716 unsigned long crc; 717 bool ret; 718 719 crc = crc32(0L, Z_NULL, 0); 720 ret = mzProcessZipEntryContents(pArchive, pEntry, crcProcessFunction, 721 (void *)&crc); 722 if (!ret) { 723 LOGE("Can't calculate CRC for entry\n"); 724 return false; 725 } 726 if (crc != (unsigned long)pEntry->crc32) { 727 LOGW("CRC for entry %.*s (0x%08lx) != expected (0x%08lx)\n", 728 pEntry->fileNameLen, pEntry->fileName, crc, pEntry->crc32); 729 return false; 730 } 731 return true; 732} 733 734typedef struct { 735 char *buf; 736 int bufLen; 737} CopyProcessArgs; 738 739static bool copyProcessFunction(const unsigned char *data, int dataLen, 740 void *cookie) 741{ 742 CopyProcessArgs *args = (CopyProcessArgs *)cookie; 743 if (dataLen <= args->bufLen) { 744 memcpy(args->buf, data, dataLen); 745 args->buf += dataLen; 746 args->bufLen -= dataLen; 747 return true; 748 } 749 return false; 750} 751 752/* 753 * Read an entry into a buffer allocated by the caller. 754 */ 755bool mzReadZipEntry(const ZipArchive* pArchive, const ZipEntry* pEntry, 756 char *buf, int bufLen) 757{ 758 CopyProcessArgs args; 759 bool ret; 760 761 args.buf = buf; 762 args.bufLen = bufLen; 763 ret = mzProcessZipEntryContents(pArchive, pEntry, copyProcessFunction, 764 (void *)&args); 765 if (!ret) { 766 LOGE("Can't extract entry to buffer.\n"); 767 return false; 768 } 769 return true; 770} 771 772static bool writeProcessFunction(const unsigned char *data, int dataLen, 773 void *fd) 774{ 775 int zeroWrites = 0; 776 ssize_t soFar = 0; 777 do { 778 ssize_t n = write((int)fd, data+soFar, dataLen-soFar); 779 if (n < 0) { 780 LOGE("Error writing %ld bytes from zip file: %s\n", 781 dataLen-soFar, strerror(errno)); 782 return false; 783 } else if (n > 0) { 784 soFar += n; 785 if (soFar == dataLen) return true; 786 if (soFar > dataLen) { 787 LOGE("write overrun? (%ld bytes instead of %d)\n", 788 soFar, dataLen); 789 return false; 790 } 791 zeroWrites = 0; 792 } else { 793 ++zeroWrites; 794 } 795 } while (zeroWrites < 5); 796 LOGE("too many consecutive zero-length writes\n"); 797 return false; 798} 799 800/* 801 * Uncompress "pEntry" in "pArchive" to "fd" at the current offset. 802 */ 803bool mzExtractZipEntryToFile(const ZipArchive *pArchive, 804 const ZipEntry *pEntry, int fd) 805{ 806 bool ret = mzProcessZipEntryContents(pArchive, pEntry, writeProcessFunction, 807 (void *)fd); 808 if (!ret) { 809 LOGE("Can't extract entry to file.\n"); 810 return false; 811 } 812 return true; 813} 814 815/* Helper state to make path translation easier and less malloc-happy. 816 */ 817typedef struct { 818 const char *targetDir; 819 const char *zipDir; 820 char *buf; 821 int targetDirLen; 822 int zipDirLen; 823 int bufLen; 824} MzPathHelper; 825 826/* Given the values of targetDir and zipDir in the helper, 827 * return the target filename of the provided entry. 828 * The helper must be initialized first. 829 */ 830static const char *targetEntryPath(MzPathHelper *helper, ZipEntry *pEntry) 831{ 832 int needLen; 833 bool firstTime = (helper->buf == NULL); 834 835 /* target file <-- targetDir + / + entry[zipDirLen:] 836 */ 837 needLen = helper->targetDirLen + 1 + 838 pEntry->fileNameLen - helper->zipDirLen + 1; 839 if (needLen > helper->bufLen) { 840 char *newBuf; 841 842 needLen *= 2; 843 newBuf = (char *)realloc(helper->buf, needLen); 844 if (newBuf == NULL) { 845 return NULL; 846 } 847 helper->buf = newBuf; 848 helper->bufLen = needLen; 849 } 850 851 /* Every path will start with the target path and a slash. 852 */ 853 if (firstTime) { 854 char *p = helper->buf; 855 memcpy(p, helper->targetDir, helper->targetDirLen); 856 p += helper->targetDirLen; 857 if (p == helper->buf || p[-1] != '/') { 858 helper->targetDirLen += 1; 859 *p++ = '/'; 860 } 861 } 862 863 /* Replace the custom part of the path with the appropriate 864 * part of the entry's path. 865 */ 866 char *epath = helper->buf + helper->targetDirLen; 867 memcpy(epath, pEntry->fileName + helper->zipDirLen, 868 pEntry->fileNameLen - helper->zipDirLen); 869 epath += pEntry->fileNameLen - helper->zipDirLen; 870 *epath = '\0'; 871 872 return helper->buf; 873} 874 875/* 876 * Inflate all entries under zipDir to the directory specified by 877 * targetDir, which must exist and be a writable directory. 878 * 879 * The immediate children of zipDir will become the immediate 880 * children of targetDir; e.g., if the archive contains the entries 881 * 882 * a/b/c/one 883 * a/b/c/two 884 * a/b/c/d/three 885 * 886 * and mzExtractRecursive(a, "a/b/c", "/tmp") is called, the resulting 887 * files will be 888 * 889 * /tmp/one 890 * /tmp/two 891 * /tmp/d/three 892 * 893 * Returns true on success, false on failure. 894 */ 895bool mzExtractRecursive(const ZipArchive *pArchive, 896 const char *zipDir, const char *targetDir, 897 int flags, const struct utimbuf *timestamp, 898 void (*callback)(const char *fn, void *), void *cookie) 899{ 900 if (zipDir[0] == '/') { 901 LOGE("mzExtractRecursive(): zipDir must be a relative path.\n"); 902 return false; 903 } 904 if (targetDir[0] != '/') { 905 LOGE("mzExtractRecursive(): targetDir must be an absolute path.\n"); 906 return false; 907 } 908 909 unsigned int zipDirLen; 910 char *zpath; 911 912 zipDirLen = strlen(zipDir); 913 zpath = (char *)malloc(zipDirLen + 2); 914 if (zpath == NULL) { 915 LOGE("Can't allocate %d bytes for zip path\n", zipDirLen + 2); 916 return false; 917 } 918 /* If zipDir is empty, we'll extract the entire zip file. 919 * Otherwise, canonicalize the path. 920 */ 921 if (zipDirLen > 0) { 922 /* Make sure there's (hopefully, exactly one) slash at the 923 * end of the path. This way we don't need to worry about 924 * accidentally extracting "one/twothree" when a path like 925 * "one/two" is specified. 926 */ 927 memcpy(zpath, zipDir, zipDirLen); 928 if (zpath[zipDirLen-1] != '/') { 929 zpath[zipDirLen++] = '/'; 930 } 931 } 932 zpath[zipDirLen] = '\0'; 933 934 /* Set up the helper structure that we'll use to assemble paths. 935 */ 936 MzPathHelper helper; 937 helper.targetDir = targetDir; 938 helper.targetDirLen = strlen(helper.targetDir); 939 helper.zipDir = zpath; 940 helper.zipDirLen = strlen(helper.zipDir); 941 helper.buf = NULL; 942 helper.bufLen = 0; 943 944 /* Walk through the entries and extract anything whose path begins 945 * with zpath. 946//TODO: since the entries are sorted, binary search for the first match 947// and stop after the first non-match. 948 */ 949 unsigned int i; 950 bool seenMatch = false; 951 int ok = true; 952 for (i = 0; i < pArchive->numEntries; i++) { 953 ZipEntry *pEntry = pArchive->pEntries + i; 954 if (pEntry->fileNameLen < zipDirLen) { 955//TODO: look out for a single empty directory entry that matches zpath, but 956// missing the trailing slash. Most zip files seem to include 957// the trailing slash, but I think it's legal to leave it off. 958// e.g., zpath "a/b/", entry "a/b", with no children of the entry. 959 /* No chance of matching. 960 */ 961#if SORT_ENTRIES 962 if (seenMatch) { 963 /* Since the entries are sorted, we can give up 964 * on the first mismatch after the first match. 965 */ 966 break; 967 } 968#endif 969 continue; 970 } 971 /* If zpath is empty, this strncmp() will match everything, 972 * which is what we want. 973 */ 974 if (strncmp(pEntry->fileName, zpath, zipDirLen) != 0) { 975#if SORT_ENTRIES 976 if (seenMatch) { 977 /* Since the entries are sorted, we can give up 978 * on the first mismatch after the first match. 979 */ 980 break; 981 } 982#endif 983 continue; 984 } 985 /* This entry begins with zipDir, so we'll extract it. 986 */ 987 seenMatch = true; 988 989 /* Find the target location of the entry. 990 */ 991 const char *targetFile = targetEntryPath(&helper, pEntry); 992 if (targetFile == NULL) { 993 LOGE("Can't assemble target path for \"%.*s\"\n", 994 pEntry->fileNameLen, pEntry->fileName); 995 ok = false; 996 break; 997 } 998 999 /* With DRY_RUN set, invoke the callback but don't do anything else. 1000 */ 1001 if (flags & MZ_EXTRACT_DRY_RUN) { 1002 if (callback != NULL) callback(targetFile, cookie); 1003 continue; 1004 } 1005 1006 /* Create the file or directory. 1007 */ 1008#define UNZIP_DIRMODE 0755 1009#define UNZIP_FILEMODE 0644 1010 if (pEntry->fileName[pEntry->fileNameLen-1] == '/') { 1011 if (!(flags & MZ_EXTRACT_FILES_ONLY)) { 1012 int ret = dirCreateHierarchy( 1013 targetFile, UNZIP_DIRMODE, timestamp, false); 1014 if (ret != 0) { 1015 LOGE("Can't create containing directory for \"%s\": %s\n", 1016 targetFile, strerror(errno)); 1017 ok = false; 1018 break; 1019 } 1020 LOGD("Extracted dir \"%s\"\n", targetFile); 1021 } 1022 } else { 1023 /* This is not a directory. First, make sure that 1024 * the containing directory exists. 1025 */ 1026 int ret = dirCreateHierarchy( 1027 targetFile, UNZIP_DIRMODE, timestamp, true); 1028 if (ret != 0) { 1029 LOGE("Can't create containing directory for \"%s\": %s\n", 1030 targetFile, strerror(errno)); 1031 ok = false; 1032 break; 1033 } 1034 1035 /* With FILES_ONLY set, we need to ignore metadata entirely, 1036 * so treat symlinks as regular files. 1037 */ 1038 if (!(flags & MZ_EXTRACT_FILES_ONLY) && mzIsZipEntrySymlink(pEntry)) { 1039 /* The entry is a symbolic link. 1040 * The relative target of the symlink is in the 1041 * data section of this entry. 1042 */ 1043 if (pEntry->uncompLen == 0) { 1044 LOGE("Symlink entry \"%s\" has no target\n", 1045 targetFile); 1046 ok = false; 1047 break; 1048 } 1049 char *linkTarget = malloc(pEntry->uncompLen + 1); 1050 if (linkTarget == NULL) { 1051 ok = false; 1052 break; 1053 } 1054 ok = mzReadZipEntry(pArchive, pEntry, linkTarget, 1055 pEntry->uncompLen); 1056 if (!ok) { 1057 LOGE("Can't read symlink target for \"%s\"\n", 1058 targetFile); 1059 free(linkTarget); 1060 break; 1061 } 1062 linkTarget[pEntry->uncompLen] = '\0'; 1063 1064 /* Make the link. 1065 */ 1066 ret = symlink(linkTarget, targetFile); 1067 if (ret != 0) { 1068 LOGE("Can't symlink \"%s\" to \"%s\": %s\n", 1069 targetFile, linkTarget, strerror(errno)); 1070 free(linkTarget); 1071 ok = false; 1072 break; 1073 } 1074 LOGD("Extracted symlink \"%s\" -> \"%s\"\n", 1075 targetFile, linkTarget); 1076 free(linkTarget); 1077 } else { 1078 /* The entry is a regular file. 1079 * Open the target for writing. 1080 */ 1081 int fd = creat(targetFile, UNZIP_FILEMODE); 1082 if (fd < 0) { 1083 LOGE("Can't create target file \"%s\": %s\n", 1084 targetFile, strerror(errno)); 1085 ok = false; 1086 break; 1087 } 1088 1089 bool ok = mzExtractZipEntryToFile(pArchive, pEntry, fd); 1090 close(fd); 1091 if (!ok) { 1092 LOGE("Error extracting \"%s\"\n", targetFile); 1093 ok = false; 1094 break; 1095 } 1096 1097 if (timestamp != NULL && utime(targetFile, timestamp)) { 1098 LOGE("Error touching \"%s\"\n", targetFile); 1099 ok = false; 1100 break; 1101 } 1102 1103 LOGD("Extracted file \"%s\"\n", targetFile); 1104 } 1105 } 1106 1107 if (callback != NULL) callback(targetFile, cookie); 1108 } 1109 1110 free(helper.buf); 1111 free(zpath); 1112 1113 return ok; 1114} 1115