1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16/* 17 * Read-only access to Zip archives, with minimal heap allocation. 18 */ 19#include "ZipArchive.h" 20 21#include <zlib.h> 22 23#include <stdlib.h> 24#include <string.h> 25#include <fcntl.h> 26#include <errno.h> 27 28 29/* 30 * Zip file constants. 31 */ 32#define kEOCDSignature 0x06054b50 33#define kEOCDLen 22 34#define kEOCDNumEntries 8 // offset to #of entries in file 35#define kEOCDFileOffset 16 // offset to central directory 36 37#define kMaxCommentLen 65535 // longest possible in ushort 38#define kMaxEOCDSearch (kMaxCommentLen + kEOCDLen) 39 40#define kLFHSignature 0x04034b50 41#define kLFHLen 30 // excluding variable-len fields 42#define kLFHNameLen 26 // offset to filename length 43#define kLFHExtraLen 28 // offset to extra length 44 45#define kCDESignature 0x02014b50 46#define kCDELen 46 // excluding variable-len fields 47#define kCDEMethod 10 // offset to compression method 48#define kCDEModWhen 12 // offset to modification timestamp 49#define kCDECRC 16 // offset to entry CRC 50#define kCDECompLen 20 // offset to compressed length 51#define kCDEUncompLen 24 // offset to uncompressed length 52#define kCDENameLen 28 // offset to filename length 53#define kCDEExtraLen 30 // offset to extra length 54#define kCDECommentLen 32 // offset to comment length 55#define kCDELocalOffset 42 // offset to local hdr 56 57/* 58 * The values we return for ZipEntry use 0 as an invalid value, so we 59 * want to adjust the hash table index by a fixed amount. Using a large 60 * value helps insure that people don't mix & match arguments, e.g. with 61 * entry indices. 62 */ 63#define kZipEntryAdj 10000 64 65/* 66 * Convert a ZipEntry to a hash table index, verifying that it's in a 67 * valid range. 68 */ 69static int entryToIndex(const ZipArchive* pArchive, const ZipEntry entry) 70{ 71 long ent = ((long) entry) - kZipEntryAdj; 72 if (ent < 0 || ent >= pArchive->mHashTableSize || 73 pArchive->mHashTable[ent].name == NULL) 74 { 75 LOGW("Invalid ZipEntry %p (%ld)\n", entry, ent); 76 return -1; 77 } 78 return ent; 79} 80 81/* 82 * Simple string hash function for non-null-terminated strings. 83 */ 84static unsigned int computeHash(const char* str, int len) 85{ 86 unsigned int hash = 0; 87 88 while (len--) 89 hash = hash * 31 + *str++; 90 91 return hash; 92} 93 94/* 95 * Add a new entry to the hash table. 96 */ 97static void addToHash(ZipArchive* pArchive, const char* str, int strLen, 98 unsigned int hash) 99{ 100 const int hashTableSize = pArchive->mHashTableSize; 101 int ent = hash & (hashTableSize - 1); 102 103 /* 104 * We over-allocated the table, so we're guaranteed to find an empty slot. 105 */ 106 while (pArchive->mHashTable[ent].name != NULL) 107 ent = (ent + 1) & (hashTableSize-1); 108 109 pArchive->mHashTable[ent].name = str; 110 pArchive->mHashTable[ent].nameLen = strLen; 111} 112 113/* 114 * Get 2 little-endian bytes. 115 */ 116static u2 get2LE(unsigned char const* pSrc) 117{ 118 return pSrc[0] | (pSrc[1] << 8); 119} 120 121/* 122 * Get 4 little-endian bytes. 123 */ 124static u4 get4LE(unsigned char const* pSrc) 125{ 126 u4 result; 127 128 result = pSrc[0]; 129 result |= pSrc[1] << 8; 130 result |= pSrc[2] << 16; 131 result |= pSrc[3] << 24; 132 133 return result; 134} 135 136/* 137 * Parse the Zip archive, verifying its contents and initializing internal 138 * data structures. 139 */ 140static bool parseZipArchive(ZipArchive* pArchive, const MemMapping* pMap) 141{ 142#define CHECK_OFFSET(_off) { \ 143 if ((unsigned int) (_off) >= maxOffset) { \ 144 LOGE("ERROR: bad offset %u (max %d): %s\n", \ 145 (unsigned int) (_off), maxOffset, #_off); \ 146 goto bail; \ 147 } \ 148 } 149 bool result = false; 150 const unsigned char* basePtr = (const unsigned char*)pMap->addr; 151 const unsigned char* ptr; 152 size_t length = pMap->length; 153 unsigned int i, numEntries, cdOffset; 154 unsigned int val; 155 156 /* 157 * The first 4 bytes of the file will either be the local header 158 * signature for the first file (kLFHSignature) or, if the archive doesn't 159 * have any files in it, the end-of-central-directory signature 160 * (kEOCDSignature). 161 */ 162 val = get4LE(basePtr); 163 if (val == kEOCDSignature) { 164 LOGI("Found Zip archive, but it looks empty\n"); 165 goto bail; 166 } else if (val != kLFHSignature) { 167 LOGV("Not a Zip archive (found 0x%08x)\n", val); 168 goto bail; 169 } 170 171 /* 172 * Find the EOCD. We'll find it immediately unless they have a file 173 * comment. 174 */ 175 ptr = basePtr + length - kEOCDLen; 176 177 while (ptr >= basePtr) { 178 if (*ptr == (kEOCDSignature & 0xff) && get4LE(ptr) == kEOCDSignature) 179 break; 180 ptr--; 181 } 182 if (ptr < basePtr) { 183 LOGI("Could not find end-of-central-directory in Zip\n"); 184 goto bail; 185 } 186 187 /* 188 * There are two interesting items in the EOCD block: the number of 189 * entries in the file, and the file offset of the start of the 190 * central directory. 191 * 192 * (There's actually a count of the #of entries in this file, and for 193 * all files which comprise a spanned archive, but for our purposes 194 * we're only interested in the current file. Besides, we expect the 195 * two to be equivalent for our stuff.) 196 */ 197 numEntries = get2LE(ptr + kEOCDNumEntries); 198 cdOffset = get4LE(ptr + kEOCDFileOffset); 199 200 /* valid offsets are [0,EOCD] */ 201 unsigned int maxOffset; 202 maxOffset = (ptr - basePtr) +1; 203 204 LOGV("+++ numEntries=%d cdOffset=%d\n", numEntries, cdOffset); 205 if (numEntries == 0 || cdOffset >= length) { 206 LOGW("Invalid entries=%d offset=%d (len=%zd)\n", 207 numEntries, cdOffset, length); 208 goto bail; 209 } 210 211 /* 212 * Create hash table. We have a minimum 75% load factor, possibly as 213 * low as 50% after we round off to a power of 2. There must be at 214 * least one unused entry to avoid an infinite loop during creation. 215 */ 216 pArchive->mNumEntries = numEntries; 217 pArchive->mHashTableSize = dexRoundUpPower2(1 + (numEntries * 4) / 3); 218 pArchive->mHashTable = (ZipHashEntry*) 219 calloc(pArchive->mHashTableSize, sizeof(ZipHashEntry)); 220 221 /* 222 * Walk through the central directory, adding entries to the hash 223 * table. 224 */ 225 ptr = basePtr + cdOffset; 226 for (i = 0; i < numEntries; i++) { 227 unsigned int fileNameLen, extraLen, commentLen, localHdrOffset; 228 const unsigned char* localHdr; 229 unsigned int hash; 230 231 if (get4LE(ptr) != kCDESignature) { 232 LOGW("Missed a central dir sig (at %d)\n", i); 233 goto bail; 234 } 235 if (ptr + kCDELen > basePtr + length) { 236 LOGW("Ran off the end (at %d)\n", i); 237 goto bail; 238 } 239 240 localHdrOffset = get4LE(ptr + kCDELocalOffset); 241 CHECK_OFFSET(localHdrOffset); 242 fileNameLen = get2LE(ptr + kCDENameLen); 243 extraLen = get2LE(ptr + kCDEExtraLen); 244 commentLen = get2LE(ptr + kCDECommentLen); 245 246 //LOGV("+++ %d: localHdr=%d fnl=%d el=%d cl=%d\n", 247 // i, localHdrOffset, fileNameLen, extraLen, commentLen); 248 //LOGV(" '%.*s'\n", fileNameLen, ptr + kCDELen); 249 250 /* add the CDE filename to the hash table */ 251 hash = computeHash((const char*)ptr + kCDELen, fileNameLen); 252 addToHash(pArchive, (const char*)ptr + kCDELen, fileNameLen, hash); 253 254 localHdr = basePtr + localHdrOffset; 255 if (get4LE(localHdr) != kLFHSignature) { 256 LOGW("Bad offset to local header: %d (at %d)\n", 257 localHdrOffset, i); 258 goto bail; 259 } 260 261 ptr += kCDELen + fileNameLen + extraLen + commentLen; 262 CHECK_OFFSET(ptr - basePtr); 263 } 264 265 result = true; 266 267bail: 268 return result; 269#undef CHECK_OFFSET 270} 271 272/* 273 * Open the specified file read-only. We memory-map the entire thing and 274 * parse the contents. 275 * 276 * This will be called on non-Zip files, especially during VM startup, so 277 * we don't want to be too noisy about certain types of failure. (Do 278 * we want a "quiet" flag?) 279 * 280 * On success, we fill out the contents of "pArchive" and return 0. 281 */ 282int dexZipOpenArchive(const char* fileName, ZipArchive* pArchive) 283{ 284 int fd, err; 285 286 LOGV("Opening archive '%s' %p\n", fileName, pArchive); 287 288 memset(pArchive, 0, sizeof(ZipArchive)); 289 290 fd = open(fileName, O_RDONLY, 0); 291 if (fd < 0) { 292 err = errno ? errno : -1; 293 LOGV("Unable to open '%s': %s\n", fileName, strerror(err)); 294 return err; 295 } 296 297 return dexZipPrepArchive(fd, fileName, pArchive); 298} 299 300/* 301 * Prepare to access a ZipArchive in an open file descriptor. 302 */ 303int dexZipPrepArchive(int fd, const char* debugFileName, ZipArchive* pArchive) 304{ 305 MemMapping map; 306 int err; 307 308 map.addr = NULL; 309 memset(pArchive, 0, sizeof(*pArchive)); 310 311 pArchive->mFd = fd; 312 313 if (sysMapFileInShmemReadOnly(pArchive->mFd, &map) != 0) { 314 err = -1; 315 LOGW("Map of '%s' failed\n", debugFileName); 316 goto bail; 317 } 318 319 if (map.length < kEOCDLen) { 320 err = -1; 321 LOGV("File '%s' too small to be zip (%zd)\n", debugFileName,map.length); 322 goto bail; 323 } 324 325 if (!parseZipArchive(pArchive, &map)) { 326 err = -1; 327 LOGV("Parsing '%s' failed\n", debugFileName); 328 goto bail; 329 } 330 331 /* success */ 332 err = 0; 333 sysCopyMap(&pArchive->mMap, &map); 334 map.addr = NULL; 335 336bail: 337 if (err != 0) 338 dexZipCloseArchive(pArchive); 339 if (map.addr != NULL) 340 sysReleaseShmem(&map); 341 return err; 342} 343 344 345/* 346 * Close a ZipArchive, closing the file and freeing the contents. 347 * 348 * NOTE: the ZipArchive may not have been fully created. 349 */ 350void dexZipCloseArchive(ZipArchive* pArchive) 351{ 352 LOGV("Closing archive %p\n", pArchive); 353 354 if (pArchive->mFd >= 0) 355 close(pArchive->mFd); 356 357 sysReleaseShmem(&pArchive->mMap); 358 359 free(pArchive->mHashTable); 360 361 pArchive->mFd = -1; 362 pArchive->mNumEntries = -1; 363 pArchive->mHashTableSize = -1; 364 pArchive->mHashTable = NULL; 365} 366 367 368/* 369 * Find a matching entry. 370 * 371 * Returns 0 if not found. 372 */ 373ZipEntry dexZipFindEntry(const ZipArchive* pArchive, const char* entryName) 374{ 375 int nameLen = strlen(entryName); 376 unsigned int hash = computeHash(entryName, nameLen); 377 const int hashTableSize = pArchive->mHashTableSize; 378 int ent = hash & (hashTableSize-1); 379 380 while (pArchive->mHashTable[ent].name != NULL) { 381 if (pArchive->mHashTable[ent].nameLen == nameLen && 382 memcmp(pArchive->mHashTable[ent].name, entryName, nameLen) == 0) 383 { 384 /* match */ 385 return (ZipEntry) (ent + kZipEntryAdj); 386 } 387 388 ent = (ent + 1) & (hashTableSize-1); 389 } 390 391 return NULL; 392} 393 394#if 0 395/* 396 * Find the Nth entry. 397 * 398 * This currently involves walking through the sparse hash table, counting 399 * non-empty entries. If we need to speed this up we can either allocate 400 * a parallel lookup table or (perhaps better) provide an iterator interface. 401 */ 402ZipEntry findEntryByIndex(ZipArchive* pArchive, int idx) 403{ 404 if (idx < 0 || idx >= pArchive->mNumEntries) { 405 LOGW("Invalid index %d\n", idx); 406 return NULL; 407 } 408 409 int ent; 410 for (ent = 0; ent < pArchive->mHashTableSize; ent++) { 411 if (pArchive->mHashTable[ent].name != NULL) { 412 if (idx-- == 0) 413 return (ZipEntry) (ent + kZipEntryAdj); 414 } 415 } 416 417 return NULL; 418} 419#endif 420 421/* 422 * Get the useful fields from the zip entry. 423 * 424 * Returns "false" if the offsets to the fields or the contents of the fields 425 * appear to be bogus. 426 */ 427bool dexZipGetEntryInfo(const ZipArchive* pArchive, ZipEntry entry, 428 int* pMethod, long* pUncompLen, long* pCompLen, off_t* pOffset, 429 long* pModWhen, long* pCrc32) 430{ 431 int ent = entryToIndex(pArchive, entry); 432 if (ent < 0) 433 return false; 434 435 /* 436 * Recover the start of the central directory entry from the filename 437 * pointer. 438 */ 439 const unsigned char* basePtr = (const unsigned char*) 440 pArchive->mMap.addr; 441 const unsigned char* ptr = (const unsigned char*) 442 pArchive->mHashTable[ent].name; 443 size_t zipLength = 444 pArchive->mMap.length; 445 446 ptr -= kCDELen; 447 448 int method = get2LE(ptr + kCDEMethod); 449 if (pMethod != NULL) 450 *pMethod = method; 451 452 if (pModWhen != NULL) 453 *pModWhen = get4LE(ptr + kCDEModWhen); 454 if (pCrc32 != NULL) 455 *pCrc32 = get4LE(ptr + kCDECRC); 456 457 /* 458 * We need to make sure that the lengths are not so large that somebody 459 * trying to map the compressed or uncompressed data runs off the end 460 * of the mapped region. 461 */ 462 unsigned long localHdrOffset = get4LE(ptr + kCDELocalOffset); 463 if (localHdrOffset + kLFHLen >= zipLength) { 464 LOGE("ERROR: bad local hdr offset in zip\n"); 465 return false; 466 } 467 const unsigned char* localHdr = basePtr + localHdrOffset; 468 off_t dataOffset = localHdrOffset + kLFHLen 469 + get2LE(localHdr + kLFHNameLen) + get2LE(localHdr + kLFHExtraLen); 470 if ((unsigned long) dataOffset >= zipLength) { 471 LOGE("ERROR: bad data offset in zip\n"); 472 return false; 473 } 474 475 if (pCompLen != NULL) { 476 *pCompLen = get4LE(ptr + kCDECompLen); 477 if (*pCompLen < 0 || (size_t)(dataOffset + *pCompLen) >= zipLength) { 478 LOGE("ERROR: bad compressed length in zip\n"); 479 return false; 480 } 481 } 482 if (pUncompLen != NULL) { 483 *pUncompLen = get4LE(ptr + kCDEUncompLen); 484 if (*pUncompLen < 0) { 485 LOGE("ERROR: negative uncompressed length in zip\n"); 486 return false; 487 } 488 if (method == kCompressStored && 489 (size_t)(dataOffset + *pUncompLen) >= zipLength) 490 { 491 LOGE("ERROR: bad uncompressed length in zip\n"); 492 return false; 493 } 494 } 495 496 if (pOffset != NULL) { 497 *pOffset = dataOffset; 498 } 499 return true; 500} 501 502/* 503 * Uncompress "deflate" data from one buffer to an open file descriptor. 504 */ 505static bool inflateToFile(int fd, const void* inBuf, long uncompLen, 506 long compLen) 507{ 508 bool result = false; 509 const int kWriteBufSize = 32768; 510 unsigned char writeBuf[kWriteBufSize]; 511 z_stream zstream; 512 int zerr; 513 514 /* 515 * Initialize the zlib stream struct. 516 */ 517 memset(&zstream, 0, sizeof(zstream)); 518 zstream.zalloc = Z_NULL; 519 zstream.zfree = Z_NULL; 520 zstream.opaque = Z_NULL; 521 zstream.next_in = (Bytef*)inBuf; 522 zstream.avail_in = compLen; 523 zstream.next_out = (Bytef*) writeBuf; 524 zstream.avail_out = sizeof(writeBuf); 525 zstream.data_type = Z_UNKNOWN; 526 527 /* 528 * Use the undocumented "negative window bits" feature to tell zlib 529 * that there's no zlib header waiting for it. 530 */ 531 zerr = inflateInit2(&zstream, -MAX_WBITS); 532 if (zerr != Z_OK) { 533 if (zerr == Z_VERSION_ERROR) { 534 LOGE("Installed zlib is not compatible with linked version (%s)\n", 535 ZLIB_VERSION); 536 } else { 537 LOGE("Call to inflateInit2 failed (zerr=%d)\n", zerr); 538 } 539 goto bail; 540 } 541 542 /* 543 * Loop while we have more to do. 544 */ 545 do { 546 /* 547 * Expand data. 548 */ 549 zerr = inflate(&zstream, Z_NO_FLUSH); 550 if (zerr != Z_OK && zerr != Z_STREAM_END) { 551 LOGW("zlib inflate: zerr=%d (nIn=%p aIn=%u nOut=%p aOut=%u)\n", 552 zerr, zstream.next_in, zstream.avail_in, 553 zstream.next_out, zstream.avail_out); 554 goto z_bail; 555 } 556 557 /* write when we're full or when we're done */ 558 if (zstream.avail_out == 0 || 559 (zerr == Z_STREAM_END && zstream.avail_out != sizeof(writeBuf))) 560 { 561 long writeSize = zstream.next_out - writeBuf; 562 int cc = write(fd, writeBuf, writeSize); 563 if (cc != (int) writeSize) { 564 if (cc < 0) { 565 LOGW("write failed in inflate: %s\n", strerror(errno)); 566 } else { 567 LOGW("partial write in inflate (%d vs %ld)\n", 568 cc, writeSize); 569 } 570 goto z_bail; 571 } 572 573 zstream.next_out = writeBuf; 574 zstream.avail_out = sizeof(writeBuf); 575 } 576 } while (zerr == Z_OK); 577 578 assert(zerr == Z_STREAM_END); /* other errors should've been caught */ 579 580 /* paranoia */ 581 if ((long) zstream.total_out != uncompLen) { 582 LOGW("Size mismatch on inflated file (%ld vs %ld)\n", 583 zstream.total_out, uncompLen); 584 goto z_bail; 585 } 586 587 result = true; 588 589z_bail: 590 inflateEnd(&zstream); /* free up any allocated structures */ 591 592bail: 593 return result; 594} 595 596/* 597 * Uncompress an entry, in its entirety, to an open file descriptor. 598 * 599 * TODO: this doesn't verify the data's CRC, but probably should (especially 600 * for uncompressed data). 601 */ 602bool dexZipExtractEntryToFile(const ZipArchive* pArchive, 603 const ZipEntry entry, int fd) 604{ 605 bool result = false; 606 int ent = entryToIndex(pArchive, entry); 607 if (ent < 0) 608 return -1; 609 610 const unsigned char* basePtr = (const unsigned char*)pArchive->mMap.addr; 611 int method; 612 long uncompLen, compLen; 613 off_t offset; 614 615 if (!dexZipGetEntryInfo(pArchive, entry, &method, &uncompLen, &compLen, 616 &offset, NULL, NULL)) 617 { 618 goto bail; 619 } 620 621 if (method == kCompressStored) { 622 ssize_t actual; 623 624 actual = write(fd, basePtr + offset, uncompLen); 625 if (actual < 0) { 626 LOGE("Write failed: %s\n", strerror(errno)); 627 goto bail; 628 } else if (actual != uncompLen) { 629 LOGE("Partial write during uncompress (%d of %ld)\n", 630 (int) actual, uncompLen); 631 goto bail; 632 } else { 633 LOGI("+++ successful write\n"); 634 } 635 } else { 636 if (!inflateToFile(fd, basePtr+offset, uncompLen, compLen)) 637 goto bail; 638 } 639 640 result = true; 641 642bail: 643 return result; 644} 645 646