1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16/* 17 * Access the contents of a .dex file. 18 */ 19 20#include "DexFile.h" 21#include "DexProto.h" 22#include "Leb128.h" 23#include "sha1.h" 24#include "ZipArchive.h" 25 26#include <zlib.h> 27 28#include <stdlib.h> 29#include <stddef.h> 30#include <string.h> 31#include <fcntl.h> 32#include <errno.h> 33 34/* 35 * Verifying checksums is good, but it slows things down and causes us to 36 * touch every page. In the "optimized" world, it doesn't work at all, 37 * because we rewrite the contents. 38 */ 39static const bool kVerifyChecksum = false; 40static const bool kVerifySignature = false; 41 42 43/* Compare two '\0'-terminated modified UTF-8 strings, using Unicode 44 * code point values for comparison. This treats different encodings 45 * for the same code point as equivalent, except that only a real '\0' 46 * byte is considered the string terminator. The return value is as 47 * for strcmp(). */ 48int dexUtf8Cmp(const char* s1, const char* s2) { 49 for (;;) { 50 if (*s1 == '\0') { 51 if (*s2 == '\0') { 52 return 0; 53 } 54 return -1; 55 } else if (*s2 == '\0') { 56 return 1; 57 } 58 59 int utf1 = dexGetUtf16FromUtf8(&s1); 60 int utf2 = dexGetUtf16FromUtf8(&s2); 61 int diff = utf1 - utf2; 62 63 if (diff != 0) { 64 return diff; 65 } 66 } 67} 68 69/* for dexIsValidMemberNameUtf8(), a bit vector indicating valid low ascii */ 70u4 DEX_MEMBER_VALID_LOW_ASCII[4] = { 71 0x00000000, // 00..1f low control characters; nothing valid 72 0x03ff2010, // 20..3f digits and symbols; valid: '0'..'9', '$', '-' 73 0x87fffffe, // 40..5f uppercase etc.; valid: 'A'..'Z', '_' 74 0x07fffffe // 60..7f lowercase etc.; valid: 'a'..'z' 75}; 76 77/* Helper for dexIsValidMemberNameUtf8(); do not call directly. */ 78bool dexIsValidMemberNameUtf8_0(const char** pUtf8Ptr) { 79 /* 80 * It's a multibyte encoded character. Decode it and analyze. We 81 * accept anything that isn't (a) an improperly encoded low value, 82 * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high 83 * control character, or (e) a high space, layout, or special 84 * character (U+00a0, U+2000..U+200f, U+2028..U+202f, 85 * U+fff0..U+ffff). 86 */ 87 88 u2 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr); 89 90 // Perform follow-up tests based on the high 8 bits. 91 switch (utf16 >> 8) { 92 case 0x00: { 93 // It's only valid if it's above the ISO-8859-1 high space (0xa0). 94 return (utf16 > 0x00a0); 95 } 96 case 0xd8: 97 case 0xd9: 98 case 0xda: 99 case 0xdb: { 100 /* 101 * It's a leading surrogate. Check to see that a trailing 102 * surrogate follows. 103 */ 104 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr); 105 return (utf16 >= 0xdc00) && (utf16 <= 0xdfff); 106 } 107 case 0xdc: 108 case 0xdd: 109 case 0xde: 110 case 0xdf: { 111 // It's a trailing surrogate, which is not valid at this point. 112 return false; 113 } 114 case 0x20: 115 case 0xff: { 116 // It's in the range that has spaces, controls, and specials. 117 switch (utf16 & 0xfff8) { 118 case 0x2000: 119 case 0x2008: 120 case 0x2028: 121 case 0xfff0: 122 case 0xfff8: { 123 return false; 124 } 125 } 126 break; 127 } 128 } 129 130 return true; 131} 132 133/* Return whether the given string is a valid field or method name. */ 134bool dexIsValidMemberName(const char* s) { 135 bool angleName = false; 136 137 switch (*s) { 138 case '\0': { 139 // The empty string is not a valid name. 140 return false; 141 } 142 case '<': { 143 /* 144 * '<' is allowed only at the start of a name, and if present, 145 * means that the name must end with '>'. 146 */ 147 angleName = true; 148 s++; 149 break; 150 } 151 } 152 153 for (;;) { 154 switch (*s) { 155 case '\0': { 156 return !angleName; 157 } 158 case '>': { 159 return angleName && s[1] == '\0'; 160 } 161 } 162 if (!dexIsValidMemberNameUtf8(&s)) { 163 return false; 164 } 165 } 166} 167 168/* Return whether the given string is a valid type descriptor. */ 169bool dexIsValidTypeDescriptor(const char* s) { 170 int arrayCount = 0; 171 172 while (*s == '[') { 173 arrayCount++; 174 s++; 175 } 176 177 if (arrayCount > 255) { 178 // Arrays may have no more than 255 dimensions. 179 return false; 180 } 181 182 switch (*(s++)) { 183 case 'B': 184 case 'C': 185 case 'D': 186 case 'F': 187 case 'I': 188 case 'J': 189 case 'S': 190 case 'Z': { 191 // These are all single-character descriptors for primitive types. 192 return (*s == '\0'); 193 } 194 case 'V': { 195 // You can't have an array of void. 196 return (arrayCount == 0) && (*s == '\0'); 197 } 198 case 'L': { 199 // Break out and continue below. 200 break; 201 } 202 default: { 203 // Oddball descriptor character. 204 return false; 205 } 206 } 207 208 // We just consumed the 'L' that introduces a class name. 209 210 bool slashOrFirst = true; // first character or just encountered a slash 211 for (;;) { 212 u1 c = (u1) *s; 213 switch (c) { 214 case '\0': { 215 // Premature end. 216 return false; 217 } 218 case ';': { 219 /* 220 * Make sure that this is the end of the string and that 221 * it doesn't end with an empty component (including the 222 * degenerate case of "L;"). 223 */ 224 return (s[1] == '\0') && !slashOrFirst; 225 } 226 case '/': { 227 if (slashOrFirst) { 228 // Slash at start or two slashes in a row. 229 return false; 230 } 231 slashOrFirst = true; 232 s++; 233 break; 234 } 235 default: { 236 if (!dexIsValidMemberNameUtf8(&s)) { 237 return false; 238 } 239 slashOrFirst = false; 240 break; 241 } 242 } 243 } 244} 245 246/* Return whether the given string is a valid reference descriptor. This 247 * is true if dexIsValidTypeDescriptor() returns true and the descriptor 248 * is for a class or array and not a primitive type. */ 249bool dexIsReferenceDescriptor(const char* s) { 250 if (!dexIsValidTypeDescriptor(s)) { 251 return false; 252 } 253 254 return (s[0] == 'L') || (s[0] == '['); 255} 256 257/* Return whether the given string is a valid class descriptor. This 258 * is true if dexIsValidTypeDescriptor() returns true and the descriptor 259 * is for a class and not an array or primitive type. */ 260bool dexIsClassDescriptor(const char* s) { 261 if (!dexIsValidTypeDescriptor(s)) { 262 return false; 263 } 264 265 return s[0] == 'L'; 266} 267 268/* Return whether the given string is a valid field type descriptor. This 269 * is true if dexIsValidTypeDescriptor() returns true and the descriptor 270 * is for anything but "void". */ 271bool dexIsFieldDescriptor(const char* s) { 272 if (!dexIsValidTypeDescriptor(s)) { 273 return false; 274 } 275 276 return s[0] != 'V'; 277} 278 279/* Return the UTF-8 encoded string with the specified string_id index, 280 * also filling in the UTF-16 size (number of 16-bit code points).*/ 281const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx, 282 u4* utf16Size) { 283 const DexStringId* pStringId = dexGetStringId(pDexFile, idx); 284 const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff; 285 286 *utf16Size = readUnsignedLeb128(&ptr); 287 return (const char*) ptr; 288} 289 290/* 291 * Format an SHA-1 digest for printing. tmpBuf must be able to hold at 292 * least kSHA1DigestOutputLen bytes. 293 */ 294const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf); 295 296/* 297 * Compute a SHA-1 digest on a range of bytes. 298 */ 299static void dexComputeSHA1Digest(const unsigned char* data, size_t length, 300 unsigned char digest[]) 301{ 302 SHA1_CTX context; 303 SHA1Init(&context); 304 SHA1Update(&context, data, length); 305 SHA1Final(digest, &context); 306} 307 308/* 309 * Format the SHA-1 digest into the buffer, which must be able to hold at 310 * least kSHA1DigestOutputLen bytes. Returns a pointer to the buffer, 311 */ 312static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf) 313{ 314 static const char hexDigit[] = "0123456789abcdef"; 315 char* cp; 316 int i; 317 318 cp = tmpBuf; 319 for (i = 0; i < kSHA1DigestLen; i++) { 320 *cp++ = hexDigit[digest[i] >> 4]; 321 *cp++ = hexDigit[digest[i] & 0x0f]; 322 } 323 *cp++ = '\0'; 324 325 assert(cp == tmpBuf + kSHA1DigestOutputLen); 326 327 return tmpBuf; 328} 329 330/* 331 * Compute a hash code on a UTF-8 string, for use with internal hash tables. 332 * 333 * This may or may not be compatible with UTF-8 hash functions used inside 334 * the Dalvik VM. 335 * 336 * The basic "multiply by 31 and add" approach does better on class names 337 * than most other things tried (e.g. adler32). 338 */ 339static u4 classDescriptorHash(const char* str) 340{ 341 u4 hash = 1; 342 343 while (*str != '\0') 344 hash = hash * 31 + *str++; 345 346 return hash; 347} 348 349/* 350 * Add an entry to the class lookup table. We hash the string and probe 351 * until we find an open slot. 352 */ 353static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup, 354 int stringOff, int classDefOff, int* pNumProbes) 355{ 356 const char* classDescriptor = 357 (const char*) (pDexFile->baseAddr + stringOff); 358 const DexClassDef* pClassDef = 359 (const DexClassDef*) (pDexFile->baseAddr + classDefOff); 360 u4 hash = classDescriptorHash(classDescriptor); 361 int mask = pLookup->numEntries-1; 362 int idx = hash & mask; 363 364 /* 365 * Find the first empty slot. We oversized the table, so this is 366 * guaranteed to finish. 367 */ 368 int probes = 0; 369 while (pLookup->table[idx].classDescriptorOffset != 0) { 370 idx = (idx + 1) & mask; 371 probes++; 372 } 373 //if (probes > 1) 374 // LOGW("classLookupAdd: probes=%d\n", probes); 375 376 pLookup->table[idx].classDescriptorHash = hash; 377 pLookup->table[idx].classDescriptorOffset = stringOff; 378 pLookup->table[idx].classDefOffset = classDefOff; 379 *pNumProbes = probes; 380} 381 382/* 383 * Round up to the next highest power of 2. 384 * 385 * Found on http://graphics.stanford.edu/~seander/bithacks.html. 386 */ 387u4 dexRoundUpPower2(u4 val) 388{ 389 val--; 390 val |= val >> 1; 391 val |= val >> 2; 392 val |= val >> 4; 393 val |= val >> 8; 394 val |= val >> 16; 395 val++; 396 397 return val; 398} 399 400/* 401 * Create the class lookup hash table. 402 * 403 * Returns newly-allocated storage. 404 */ 405DexClassLookup* dexCreateClassLookup(DexFile* pDexFile) 406{ 407 DexClassLookup* pLookup; 408 int allocSize; 409 int i, numEntries; 410 int numProbes, totalProbes, maxProbes; 411 412 numProbes = totalProbes = maxProbes = 0; 413 414 assert(pDexFile != NULL); 415 416 /* 417 * Using a factor of 3 results in far less probing than a factor of 2, 418 * but almost doubles the flash storage requirements for the bootstrap 419 * DEX files. The overall impact on class loading performance seems 420 * to be minor. We could probably get some performance improvement by 421 * using a secondary hash. 422 */ 423 numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2); 424 allocSize = offsetof(DexClassLookup, table) 425 + numEntries * sizeof(pLookup->table[0]); 426 427 pLookup = (DexClassLookup*) calloc(1, allocSize); 428 if (pLookup == NULL) 429 return NULL; 430 pLookup->size = allocSize; 431 pLookup->numEntries = numEntries; 432 433 for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) { 434 const DexClassDef* pClassDef; 435 const char* pString; 436 437 pClassDef = dexGetClassDef(pDexFile, i); 438 pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx); 439 440 classLookupAdd(pDexFile, pLookup, 441 (u1*)pString - pDexFile->baseAddr, 442 (u1*)pClassDef - pDexFile->baseAddr, &numProbes); 443 444 if (numProbes > maxProbes) 445 maxProbes = numProbes; 446 totalProbes += numProbes; 447 } 448 449 LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d" 450 " total=%d max=%d\n", 451 pDexFile->pHeader->classDefsSize, numEntries, 452 (100 * pDexFile->pHeader->classDefsSize) / numEntries, 453 allocSize, totalProbes, maxProbes); 454 455 return pLookup; 456} 457 458 459/* 460 * Set up the basic raw data pointers of a DexFile. This function isn't 461 * meant for general use. 462 */ 463void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) { 464 DexHeader *pHeader = (DexHeader*) data; 465 466 pDexFile->baseAddr = data; 467 pDexFile->pHeader = pHeader; 468 pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff); 469 pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff); 470 pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff); 471 pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff); 472 pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff); 473 pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff); 474 pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff); 475} 476 477 478/* 479 * Parse out an index map entry, advancing "*pData" and reducing "*pSize". 480 */ 481static bool parseIndexMapEntry(const u1** pData, u4* pSize, bool expanding, 482 u4* pFullCount, u4* pReducedCount, const u2** pMap) 483{ 484 const u4* wordPtr = (const u4*) *pData; 485 u4 size = *pSize; 486 u4 mapCount; 487 488 if (expanding) { 489 if (size < 4) 490 return false; 491 mapCount = *pReducedCount = *wordPtr++; 492 *pFullCount = (u4) -1; 493 size -= sizeof(u4); 494 } else { 495 if (size < 8) 496 return false; 497 mapCount = *pFullCount = *wordPtr++; 498 *pReducedCount = *wordPtr++; 499 size -= sizeof(u4) * 2; 500 } 501 502 u4 mapSize = mapCount * sizeof(u2); 503 504 if (size < mapSize) 505 return false; 506 *pMap = (const u2*) wordPtr; 507 size -= mapSize; 508 509 /* advance the pointer */ 510 const u1* ptr = (const u1*) wordPtr; 511 ptr += (mapSize + 3) & ~0x3; 512 513 /* update pass-by-reference values */ 514 *pData = (const u1*) ptr; 515 *pSize = size; 516 517 return true; 518} 519 520/* 521 * Set up some pointers into the mapped data. 522 * 523 * See analysis/ReduceConstants.c for the data layout description. 524 */ 525static bool parseIndexMap(DexFile* pDexFile, const u1* data, u4 size, 526 bool expanding) 527{ 528 if (!parseIndexMapEntry(&data, &size, expanding, 529 &pDexFile->indexMap.classFullCount, 530 &pDexFile->indexMap.classReducedCount, 531 &pDexFile->indexMap.classMap)) 532 { 533 return false; 534 } 535 536 if (!parseIndexMapEntry(&data, &size, expanding, 537 &pDexFile->indexMap.methodFullCount, 538 &pDexFile->indexMap.methodReducedCount, 539 &pDexFile->indexMap.methodMap)) 540 { 541 return false; 542 } 543 544 if (!parseIndexMapEntry(&data, &size, expanding, 545 &pDexFile->indexMap.fieldFullCount, 546 &pDexFile->indexMap.fieldReducedCount, 547 &pDexFile->indexMap.fieldMap)) 548 { 549 return false; 550 } 551 552 if (!parseIndexMapEntry(&data, &size, expanding, 553 &pDexFile->indexMap.stringFullCount, 554 &pDexFile->indexMap.stringReducedCount, 555 &pDexFile->indexMap.stringMap)) 556 { 557 return false; 558 } 559 560 if (expanding) { 561 /* 562 * The map includes the "reduced" counts; pull the original counts 563 * out of the DexFile so that code has a consistent source. 564 */ 565 assert(pDexFile->indexMap.classFullCount == (u4) -1); 566 assert(pDexFile->indexMap.methodFullCount == (u4) -1); 567 assert(pDexFile->indexMap.fieldFullCount == (u4) -1); 568 assert(pDexFile->indexMap.stringFullCount == (u4) -1); 569 570#if 0 // TODO: not available yet -- do later or just skip this 571 pDexFile->indexMap.classFullCount = 572 pDexFile->pHeader->typeIdsSize; 573 pDexFile->indexMap.methodFullCount = 574 pDexFile->pHeader->methodIdsSize; 575 pDexFile->indexMap.fieldFullCount = 576 pDexFile->pHeader->fieldIdsSize; 577 pDexFile->indexMap.stringFullCount = 578 pDexFile->pHeader->stringIdsSize; 579#endif 580 } 581 582 LOGI("Class : %u %u %u\n", 583 pDexFile->indexMap.classFullCount, 584 pDexFile->indexMap.classReducedCount, 585 pDexFile->indexMap.classMap[0]); 586 LOGI("Method: %u %u %u\n", 587 pDexFile->indexMap.methodFullCount, 588 pDexFile->indexMap.methodReducedCount, 589 pDexFile->indexMap.methodMap[0]); 590 LOGI("Field : %u %u %u\n", 591 pDexFile->indexMap.fieldFullCount, 592 pDexFile->indexMap.fieldReducedCount, 593 pDexFile->indexMap.fieldMap[0]); 594 LOGI("String: %u %u %u\n", 595 pDexFile->indexMap.stringFullCount, 596 pDexFile->indexMap.stringReducedCount, 597 pDexFile->indexMap.stringMap[0]); 598 599 return true; 600} 601 602/* 603 * Parse some auxillary data tables. 604 * 605 * v1.0 wrote a zero in the first 32 bits, followed by the DexClassLookup 606 * table. Subsequent versions switched to the "chunk" format. 607 */ 608static bool parseAuxData(const u1* data, DexFile* pDexFile) 609{ 610 const u4* pAux = (const u4*) (data + pDexFile->pOptHeader->auxOffset); 611 u4 indexMapType = 0; 612 613 /* v1.0 format? */ 614 if (*pAux == 0) { 615 LOGV("+++ found OLD dex format\n"); 616 pDexFile->pClassLookup = (const DexClassLookup*) (pAux+1); 617 return true; 618 } 619 LOGV("+++ found NEW dex format\n"); 620 621 /* process chunks until we see the end marker */ 622 while (*pAux != kDexChunkEnd) { 623 u4 size = *(pAux+1); 624 u1* data = (u1*) (pAux + 2); 625 626 switch (*pAux) { 627 case kDexChunkClassLookup: 628 pDexFile->pClassLookup = (const DexClassLookup*) data; 629 break; 630 case kDexChunkReducingIndexMap: 631 LOGI("+++ found reducing index map, size=%u\n", size); 632 if (!parseIndexMap(pDexFile, data, size, false)) { 633 LOGE("Failed parsing reducing index map\n"); 634 return false; 635 } 636 indexMapType = *pAux; 637 break; 638 case kDexChunkExpandingIndexMap: 639 LOGI("+++ found expanding index map, size=%u\n", size); 640 if (!parseIndexMap(pDexFile, data, size, true)) { 641 LOGE("Failed parsing expanding index map\n"); 642 return false; 643 } 644 indexMapType = *pAux; 645 break; 646 default: 647 LOGI("Unknown chunk 0x%08x (%c%c%c%c), size=%d in aux data area\n", 648 *pAux, 649 (char) ((*pAux) >> 24), (char) ((*pAux) >> 16), 650 (char) ((*pAux) >> 8), (char) (*pAux), 651 size); 652 break; 653 } 654 655 /* 656 * Advance pointer, padding to 64-bit boundary. The extra "+8" is 657 * for the type/size header. 658 */ 659 size = (size + 8 + 7) & ~7; 660 pAux += size / sizeof(u4); 661 } 662 663#if 0 // TODO: propagate expected map type from the VM through the API 664 /* 665 * If we're configured to expect an index map, and we don't find one, 666 * reject this DEX so we'll regenerate it. Also, if we found an 667 * "expanding" map but we're not configured to use it, we have to fail 668 * because the constants aren't usable without translation. 669 */ 670 if (indexMapType != expectedIndexMapType) { 671 LOGW("Incompatible index map configuration: found 0x%04x, need %d\n", 672 indexMapType, DVM_REDUCE_CONSTANTS); 673 return false; 674 } 675#endif 676 677 return true; 678} 679 680/* 681 * Parse an optimized or unoptimized .dex file sitting in memory. This is 682 * called after the byte-ordering and structure alignment has been fixed up. 683 * 684 * On success, return a newly-allocated DexFile. 685 */ 686DexFile* dexFileParse(const u1* data, size_t length, int flags) 687{ 688 DexFile* pDexFile = NULL; 689 const DexHeader* pHeader; 690 const u1* magic; 691 int result = -1; 692 693 if (length < sizeof(DexHeader)) { 694 LOGE("too short to be a valid .dex\n"); 695 goto bail; /* bad file format */ 696 } 697 698 pDexFile = (DexFile*) malloc(sizeof(DexFile)); 699 if (pDexFile == NULL) 700 goto bail; /* alloc failure */ 701 memset(pDexFile, 0, sizeof(DexFile)); 702 703 /* 704 * Peel off the optimized header. 705 */ 706 if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) { 707 magic = data; 708 if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) { 709 LOGE("bad opt version (0x%02x %02x %02x %02x)\n", 710 magic[4], magic[5], magic[6], magic[7]); 711 goto bail; 712 } 713 714 pDexFile->pOptHeader = (const DexOptHeader*) data; 715 LOGV("Good opt header, DEX offset is %d, flags=0x%02x\n", 716 pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags); 717 718 /* locate some auxillary data tables */ 719 if (!parseAuxData(data, pDexFile)) 720 goto bail; 721 722 /* ignore the opt header and appended data from here on out */ 723 data += pDexFile->pOptHeader->dexOffset; 724 length -= pDexFile->pOptHeader->dexOffset; 725 if (pDexFile->pOptHeader->dexLength > length) { 726 LOGE("File truncated? stored len=%d, rem len=%d\n", 727 pDexFile->pOptHeader->dexLength, (int) length); 728 goto bail; 729 } 730 length = pDexFile->pOptHeader->dexLength; 731 } 732 733 dexFileSetupBasicPointers(pDexFile, data); 734 pHeader = pDexFile->pHeader; 735 736 magic = pHeader->magic; 737 if (memcmp(magic, DEX_MAGIC, 4) != 0) { 738 /* not expected */ 739 LOGE("bad magic number (0x%02x %02x %02x %02x)\n", 740 magic[0], magic[1], magic[2], magic[3]); 741 goto bail; 742 } 743 if (memcmp(magic+4, DEX_MAGIC_VERS, 4) != 0) { 744 LOGE("bad dex version (0x%02x %02x %02x %02x)\n", 745 magic[4], magic[5], magic[6], magic[7]); 746 goto bail; 747 } 748 749 /* 750 * Verify the checksum. This is reasonably quick, but does require 751 * touching every byte in the DEX file. The checksum changes after 752 * byte-swapping and DEX optimization. 753 */ 754 if (flags & kDexParseVerifyChecksum) { 755 u4 adler = dexComputeChecksum(pHeader); 756 if (adler != pHeader->checksum) { 757 LOGE("ERROR: bad checksum (%08x vs %08x)\n", 758 adler, pHeader->checksum); 759 if (!(flags & kDexParseContinueOnError)) 760 goto bail; 761 } else { 762 LOGV("+++ adler32 checksum (%08x) verified\n", adler); 763 } 764 } 765 766 /* 767 * Verify the SHA-1 digest. (Normally we don't want to do this -- 768 * the digest is used to uniquely identify a DEX file, and can't be 769 * computed post-optimization.) 770 * 771 * The digest will be invalid after byte swapping and DEX optimization. 772 */ 773 if (kVerifySignature) { 774 unsigned char sha1Digest[kSHA1DigestLen]; 775 const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) + 776 kSHA1DigestLen; 777 778 dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest); 779 if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) { 780 char tmpBuf1[kSHA1DigestOutputLen]; 781 char tmpBuf2[kSHA1DigestOutputLen]; 782 LOGE("ERROR: bad SHA1 digest (%s vs %s)\n", 783 dexSHA1DigestToStr(sha1Digest, tmpBuf1), 784 dexSHA1DigestToStr(pHeader->signature, tmpBuf2)); 785 if (!(flags & kDexParseContinueOnError)) 786 goto bail; 787 } else { 788 LOGV("+++ sha1 digest verified\n"); 789 } 790 } 791 792 if (pHeader->fileSize != length) { 793 LOGE("ERROR: stored file size (%d) != expected (%d)\n", 794 (int) pHeader->fileSize, (int) length); 795 if (!(flags & kDexParseContinueOnError)) 796 goto bail; 797 } 798 799 if (pHeader->classDefsSize == 0) { 800 LOGE("ERROR: DEX file has no classes in it, failing\n"); 801 goto bail; 802 } 803 804 /* 805 * Success! 806 */ 807 result = 0; 808 809bail: 810 if (result != 0 && pDexFile != NULL) { 811 dexFileFree(pDexFile); 812 pDexFile = NULL; 813 } 814 return pDexFile; 815} 816 817/* 818 * Free up the DexFile and any associated data structures. 819 * 820 * Note we may be called with a partially-initialized DexFile. 821 */ 822void dexFileFree(DexFile* pDexFile) 823{ 824 if (pDexFile == NULL) 825 return; 826 827 free(pDexFile); 828} 829 830/* 831 * Look up a class definition entry by descriptor. 832 * 833 * "descriptor" should look like "Landroid/debug/Stuff;". 834 */ 835const DexClassDef* dexFindClass(const DexFile* pDexFile, 836 const char* descriptor) 837{ 838 const DexClassLookup* pLookup = pDexFile->pClassLookup; 839 u4 hash; 840 int idx, mask; 841 842 hash = classDescriptorHash(descriptor); 843 mask = pLookup->numEntries - 1; 844 idx = hash & mask; 845 846 /* 847 * Search until we find a matching entry or an empty slot. 848 */ 849 while (true) { 850 int offset; 851 852 offset = pLookup->table[idx].classDescriptorOffset; 853 if (offset == 0) 854 return NULL; 855 856 if (pLookup->table[idx].classDescriptorHash == hash) { 857 const char* str; 858 859 str = (const char*) (pDexFile->baseAddr + offset); 860 if (strcmp(str, descriptor) == 0) { 861 return (const DexClassDef*) 862 (pDexFile->baseAddr + pLookup->table[idx].classDefOffset); 863 } 864 } 865 866 idx = (idx + 1) & mask; 867 } 868} 869 870 871/* 872 * Compute the DEX file checksum for a memory-mapped DEX file. 873 */ 874u4 dexComputeChecksum(const DexHeader* pHeader) 875{ 876 const u1* start = (const u1*) pHeader; 877 878 uLong adler = adler32(0L, Z_NULL, 0); 879 const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum); 880 881 return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum); 882} 883 884 885/* 886 * =========================================================================== 887 * Debug info 888 * =========================================================================== 889 */ 890 891/* 892 * Decode the arguments in a method signature, which looks something 893 * like "(ID[Ljava/lang/String;)V". 894 * 895 * Returns the type signature letter for the next argument, or ')' if 896 * there are no more args. Advances "pSig" to point to the character 897 * after the one returned. 898 */ 899static char decodeSignature(const char** pSig) 900{ 901 const char* sig = *pSig; 902 903 if (*sig == '(') 904 sig++; 905 906 if (*sig == 'L') { 907 /* object ref */ 908 while (*++sig != ';') 909 ; 910 *pSig = sig+1; 911 return 'L'; 912 } 913 if (*sig == '[') { 914 /* array; advance past array type */ 915 while (*++sig == '[') 916 ; 917 if (*sig == 'L') { 918 while (*++sig != ';') 919 ; 920 } 921 *pSig = sig+1; 922 return '['; 923 } 924 if (*sig == '\0') 925 return *sig; /* don't advance further */ 926 927 *pSig = sig+1; 928 return *sig; 929} 930 931/* 932 * returns the length of a type string, given the start of the 933 * type string. Used for the case where the debug info format 934 * references types that are inside a method type signature. 935 */ 936static int typeLength (const char *type) { 937 // Assumes any leading '(' has already been gobbled 938 const char *end = type; 939 decodeSignature(&end); 940 return end - type; 941} 942 943/* 944 * Reads a string index as encoded for the debug info format, 945 * returning a string pointer or NULL as appropriate. 946 */ 947static const char* readStringIdx(const DexFile* pDexFile, 948 const u1** pStream) { 949 u4 stringIdx = readUnsignedLeb128(pStream); 950 951 // Remember, encoded string indicies have 1 added to them. 952 if (stringIdx == 0) { 953 return NULL; 954 } else { 955 return dexStringById(pDexFile, stringIdx - 1); 956 } 957} 958 959/* 960 * Reads a type index as encoded for the debug info format, returning 961 * a string pointer for its descriptor or NULL as appropriate. 962 */ 963static const char* readTypeIdx(const DexFile* pDexFile, 964 const u1** pStream) { 965 u4 typeIdx = readUnsignedLeb128(pStream); 966 967 // Remember, encoded type indicies have 1 added to them. 968 if (typeIdx == 0) { 969 return NULL; 970 } else { 971 return dexStringByTypeIdx(pDexFile, typeIdx - 1); 972 } 973} 974 975/* access_flag value indicating that a method is static */ 976#define ACC_STATIC 0x0008 977 978typedef struct LocalInfo { 979 const char *name; 980 const char *descriptor; 981 const char *signature; 982 u2 startAddress; 983 bool live; 984} LocalInfo; 985 986static void emitLocalCbIfLive (void *cnxt, int reg, u4 endAddress, 987 LocalInfo *localInReg, DexDebugNewLocalCb localCb) 988{ 989 if (localCb != NULL && localInReg[reg].live) { 990 localCb(cnxt, reg, localInReg[reg].startAddress, endAddress, 991 localInReg[reg].name, 992 localInReg[reg].descriptor, 993 localInReg[reg].signature == NULL 994 ? "" : localInReg[reg].signature ); 995 } 996} 997 998// TODO optimize localCb == NULL case 999void dexDecodeDebugInfo( 1000 const DexFile* pDexFile, 1001 const DexCode* pCode, 1002 const char* classDescriptor, 1003 u4 protoIdx, 1004 u4 accessFlags, 1005 DexDebugNewPositionCb posCb, DexDebugNewLocalCb localCb, 1006 void* cnxt) 1007{ 1008 const u1 *stream = dexGetDebugInfoStream(pDexFile, pCode); 1009 u4 line; 1010 u4 parametersSize; 1011 u4 address = 0; 1012 LocalInfo localInReg[pCode->registersSize]; 1013 u4 insnsSize = pCode->insnsSize; 1014 DexProto proto = { pDexFile, protoIdx }; 1015 1016 memset(localInReg, 0, sizeof(LocalInfo) * pCode->registersSize); 1017 1018 if (stream == NULL) { 1019 goto end; 1020 } 1021 1022 line = readUnsignedLeb128(&stream); 1023 parametersSize = readUnsignedLeb128(&stream); 1024 1025 u2 argReg = pCode->registersSize - pCode->insSize; 1026 1027 if ((accessFlags & ACC_STATIC) == 0) { 1028 /* 1029 * The code is an instance method, which means that there is 1030 * an initial this parameter. Also, the proto list should 1031 * contain exactly one fewer argument word than the insSize 1032 * indicates. 1033 */ 1034 assert(pCode->insSize == (dexProtoComputeArgsSize(&proto) + 1)); 1035 localInReg[argReg].name = "this"; 1036 localInReg[argReg].descriptor = classDescriptor; 1037 localInReg[argReg].startAddress = 0; 1038 localInReg[argReg].live = true; 1039 argReg++; 1040 } else { 1041 assert(pCode->insSize == dexProtoComputeArgsSize(&proto)); 1042 } 1043 1044 DexParameterIterator iterator; 1045 dexParameterIteratorInit(&iterator, &proto); 1046 1047 while (parametersSize-- != 0) { 1048 const char* descriptor = dexParameterIteratorNextDescriptor(&iterator); 1049 const char *name; 1050 int reg; 1051 1052 if ((argReg >= pCode->registersSize) || (descriptor == NULL)) { 1053 goto invalid_stream; 1054 } 1055 1056 name = readStringIdx(pDexFile, &stream); 1057 reg = argReg; 1058 1059 switch (descriptor[0]) { 1060 case 'D': 1061 case 'J': 1062 argReg += 2; 1063 break; 1064 default: 1065 argReg += 1; 1066 break; 1067 } 1068 1069 if (name != NULL) { 1070 localInReg[reg].name = name; 1071 localInReg[reg].descriptor = descriptor; 1072 localInReg[reg].signature = NULL; 1073 localInReg[reg].startAddress = address; 1074 localInReg[reg].live = true; 1075 } 1076 } 1077 1078 for (;;) { 1079 u1 opcode = *stream++; 1080 u2 reg; 1081 1082 switch (opcode) { 1083 case DBG_END_SEQUENCE: 1084 goto end; 1085 1086 case DBG_ADVANCE_PC: 1087 address += readUnsignedLeb128(&stream); 1088 break; 1089 1090 case DBG_ADVANCE_LINE: 1091 line += readSignedLeb128(&stream); 1092 break; 1093 1094 case DBG_START_LOCAL: 1095 case DBG_START_LOCAL_EXTENDED: 1096 reg = readUnsignedLeb128(&stream); 1097 if (reg > pCode->registersSize) goto invalid_stream; 1098 1099 // Emit what was previously there, if anything 1100 emitLocalCbIfLive (cnxt, reg, address, 1101 localInReg, localCb); 1102 1103 localInReg[reg].name = readStringIdx(pDexFile, &stream); 1104 localInReg[reg].descriptor = readTypeIdx(pDexFile, &stream); 1105 if (opcode == DBG_START_LOCAL_EXTENDED) { 1106 localInReg[reg].signature 1107 = readStringIdx(pDexFile, &stream); 1108 } else { 1109 localInReg[reg].signature = NULL; 1110 } 1111 localInReg[reg].startAddress = address; 1112 localInReg[reg].live = true; 1113 break; 1114 1115 case DBG_END_LOCAL: 1116 reg = readUnsignedLeb128(&stream); 1117 if (reg > pCode->registersSize) goto invalid_stream; 1118 1119 emitLocalCbIfLive (cnxt, reg, address, localInReg, localCb); 1120 localInReg[reg].live = false; 1121 break; 1122 1123 case DBG_RESTART_LOCAL: 1124 reg = readUnsignedLeb128(&stream); 1125 if (reg > pCode->registersSize) goto invalid_stream; 1126 1127 if (localInReg[reg].name == NULL 1128 || localInReg[reg].descriptor == NULL) { 1129 goto invalid_stream; 1130 } 1131 1132 /* 1133 * If the register is live, the "restart" is superfluous, 1134 * and we don't want to mess with the existing start address. 1135 */ 1136 if (!localInReg[reg].live) { 1137 localInReg[reg].startAddress = address; 1138 localInReg[reg].live = true; 1139 } 1140 break; 1141 1142 case DBG_SET_PROLOGUE_END: 1143 case DBG_SET_EPILOGUE_BEGIN: 1144 case DBG_SET_FILE: 1145 break; 1146 1147 default: { 1148 int adjopcode = opcode - DBG_FIRST_SPECIAL; 1149 1150 address += adjopcode / DBG_LINE_RANGE; 1151 line += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE); 1152 1153 if (posCb != NULL) { 1154 int done; 1155 done = posCb(cnxt, address, line); 1156 1157 if (done) { 1158 // early exit 1159 goto end; 1160 } 1161 } 1162 break; 1163 } 1164 } 1165 } 1166 1167end: 1168 { 1169 int reg; 1170 for (reg = 0; reg < pCode->registersSize; reg++) { 1171 emitLocalCbIfLive (cnxt, reg, insnsSize, localInReg, localCb); 1172 } 1173 } 1174 return; 1175 1176invalid_stream: 1177 IF_LOGE() { 1178 char* methodDescriptor = dexProtoCopyMethodDescriptor(&proto); 1179 LOGE("Invalid debug info stream. class %s; proto %s", 1180 classDescriptor, methodDescriptor); 1181 free(methodDescriptor); 1182 } 1183} 1184