1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16/*
17 * Access the contents of a .dex file.
18 */
19
20#include "DexFile.h"
21#include "DexProto.h"
22#include "Leb128.h"
23#include "sha1.h"
24#include "ZipArchive.h"
25
26#include <zlib.h>
27
28#include <stdlib.h>
29#include <stddef.h>
30#include <string.h>
31#include <fcntl.h>
32#include <errno.h>
33
34/*
35 * Verifying checksums is good, but it slows things down and causes us to
36 * touch every page.  In the "optimized" world, it doesn't work at all,
37 * because we rewrite the contents.
38 */
39static const bool kVerifyChecksum = false;
40static const bool kVerifySignature = false;
41
42
43/* Compare two '\0'-terminated modified UTF-8 strings, using Unicode
44 * code point values for comparison. This treats different encodings
45 * for the same code point as equivalent, except that only a real '\0'
46 * byte is considered the string terminator. The return value is as
47 * for strcmp(). */
48int dexUtf8Cmp(const char* s1, const char* s2) {
49    for (;;) {
50        if (*s1 == '\0') {
51            if (*s2 == '\0') {
52                return 0;
53            }
54            return -1;
55        } else if (*s2 == '\0') {
56            return 1;
57        }
58
59        int utf1 = dexGetUtf16FromUtf8(&s1);
60        int utf2 = dexGetUtf16FromUtf8(&s2);
61        int diff = utf1 - utf2;
62
63        if (diff != 0) {
64            return diff;
65        }
66    }
67}
68
69/* for dexIsValidMemberNameUtf8(), a bit vector indicating valid low ascii */
70u4 DEX_MEMBER_VALID_LOW_ASCII[4] = {
71    0x00000000, // 00..1f low control characters; nothing valid
72    0x03ff2010, // 20..3f digits and symbols; valid: '0'..'9', '$', '-'
73    0x87fffffe, // 40..5f uppercase etc.; valid: 'A'..'Z', '_'
74    0x07fffffe  // 60..7f lowercase etc.; valid: 'a'..'z'
75};
76
77/* Helper for dexIsValidMemberNameUtf8(); do not call directly. */
78bool dexIsValidMemberNameUtf8_0(const char** pUtf8Ptr) {
79    /*
80     * It's a multibyte encoded character. Decode it and analyze. We
81     * accept anything that isn't (a) an improperly encoded low value,
82     * (b) an improper surrogate pair, (c) an encoded '\0', (d) a high
83     * control character, or (e) a high space, layout, or special
84     * character (U+00a0, U+2000..U+200f, U+2028..U+202f,
85     * U+fff0..U+ffff).
86     */
87
88    u2 utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
89
90    // Perform follow-up tests based on the high 8 bits.
91    switch (utf16 >> 8) {
92        case 0x00: {
93            // It's only valid if it's above the ISO-8859-1 high space (0xa0).
94            return (utf16 > 0x00a0);
95        }
96        case 0xd8:
97        case 0xd9:
98        case 0xda:
99        case 0xdb: {
100            /*
101             * It's a leading surrogate. Check to see that a trailing
102             * surrogate follows.
103             */
104            utf16 = dexGetUtf16FromUtf8(pUtf8Ptr);
105            return (utf16 >= 0xdc00) && (utf16 <= 0xdfff);
106        }
107        case 0xdc:
108        case 0xdd:
109        case 0xde:
110        case 0xdf: {
111            // It's a trailing surrogate, which is not valid at this point.
112            return false;
113        }
114        case 0x20:
115        case 0xff: {
116            // It's in the range that has spaces, controls, and specials.
117            switch (utf16 & 0xfff8) {
118                case 0x2000:
119                case 0x2008:
120                case 0x2028:
121                case 0xfff0:
122                case 0xfff8: {
123                    return false;
124                }
125            }
126            break;
127        }
128    }
129
130    return true;
131}
132
133/* Return whether the given string is a valid field or method name. */
134bool dexIsValidMemberName(const char* s) {
135    bool angleName = false;
136
137    switch (*s) {
138        case '\0': {
139            // The empty string is not a valid name.
140            return false;
141        }
142        case '<': {
143            /*
144             * '<' is allowed only at the start of a name, and if present,
145             * means that the name must end with '>'.
146             */
147            angleName = true;
148            s++;
149            break;
150        }
151    }
152
153    for (;;) {
154        switch (*s) {
155            case '\0': {
156                return !angleName;
157            }
158            case '>': {
159                return angleName && s[1] == '\0';
160            }
161        }
162        if (!dexIsValidMemberNameUtf8(&s)) {
163            return false;
164        }
165    }
166}
167
168/* Return whether the given string is a valid type descriptor. */
169bool dexIsValidTypeDescriptor(const char* s) {
170    int arrayCount = 0;
171
172    while (*s == '[') {
173        arrayCount++;
174        s++;
175    }
176
177    if (arrayCount > 255) {
178        // Arrays may have no more than 255 dimensions.
179        return false;
180    }
181
182    switch (*(s++)) {
183        case 'B':
184        case 'C':
185        case 'D':
186        case 'F':
187        case 'I':
188        case 'J':
189        case 'S':
190        case 'Z': {
191            // These are all single-character descriptors for primitive types.
192            return (*s == '\0');
193        }
194        case 'V': {
195            // You can't have an array of void.
196            return (arrayCount == 0) && (*s == '\0');
197        }
198        case 'L': {
199            // Break out and continue below.
200            break;
201        }
202        default: {
203            // Oddball descriptor character.
204            return false;
205        }
206    }
207
208    // We just consumed the 'L' that introduces a class name.
209
210    bool slashOrFirst = true; // first character or just encountered a slash
211    for (;;) {
212        u1 c = (u1) *s;
213        switch (c) {
214            case '\0': {
215                // Premature end.
216                return false;
217            }
218            case ';': {
219                /*
220                 * Make sure that this is the end of the string and that
221                 * it doesn't end with an empty component (including the
222                 * degenerate case of "L;").
223                 */
224                return (s[1] == '\0') && !slashOrFirst;
225            }
226            case '/': {
227                if (slashOrFirst) {
228                    // Slash at start or two slashes in a row.
229                    return false;
230                }
231                slashOrFirst = true;
232                s++;
233                break;
234            }
235            default: {
236                if (!dexIsValidMemberNameUtf8(&s)) {
237                    return false;
238                }
239                slashOrFirst = false;
240                break;
241            }
242        }
243    }
244}
245
246/* Return whether the given string is a valid reference descriptor. This
247 * is true if dexIsValidTypeDescriptor() returns true and the descriptor
248 * is for a class or array and not a primitive type. */
249bool dexIsReferenceDescriptor(const char* s) {
250    if (!dexIsValidTypeDescriptor(s)) {
251        return false;
252    }
253
254    return (s[0] == 'L') || (s[0] == '[');
255}
256
257/* Return whether the given string is a valid class descriptor. This
258 * is true if dexIsValidTypeDescriptor() returns true and the descriptor
259 * is for a class and not an array or primitive type. */
260bool dexIsClassDescriptor(const char* s) {
261    if (!dexIsValidTypeDescriptor(s)) {
262        return false;
263    }
264
265    return s[0] == 'L';
266}
267
268/* Return whether the given string is a valid field type descriptor. This
269 * is true if dexIsValidTypeDescriptor() returns true and the descriptor
270 * is for anything but "void". */
271bool dexIsFieldDescriptor(const char* s) {
272    if (!dexIsValidTypeDescriptor(s)) {
273        return false;
274    }
275
276    return s[0] != 'V';
277}
278
279/* Return the UTF-8 encoded string with the specified string_id index,
280 * also filling in the UTF-16 size (number of 16-bit code points).*/
281const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx,
282        u4* utf16Size) {
283    const DexStringId* pStringId = dexGetStringId(pDexFile, idx);
284    const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff;
285
286    *utf16Size = readUnsignedLeb128(&ptr);
287    return (const char*) ptr;
288}
289
290/*
291 * Format an SHA-1 digest for printing.  tmpBuf must be able to hold at
292 * least kSHA1DigestOutputLen bytes.
293 */
294const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf);
295
296/*
297 * Compute a SHA-1 digest on a range of bytes.
298 */
299static void dexComputeSHA1Digest(const unsigned char* data, size_t length,
300    unsigned char digest[])
301{
302    SHA1_CTX context;
303    SHA1Init(&context);
304    SHA1Update(&context, data, length);
305    SHA1Final(digest, &context);
306}
307
308/*
309 * Format the SHA-1 digest into the buffer, which must be able to hold at
310 * least kSHA1DigestOutputLen bytes.  Returns a pointer to the buffer,
311 */
312static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf)
313{
314    static const char hexDigit[] = "0123456789abcdef";
315    char* cp;
316    int i;
317
318    cp = tmpBuf;
319    for (i = 0; i < kSHA1DigestLen; i++) {
320        *cp++ = hexDigit[digest[i] >> 4];
321        *cp++ = hexDigit[digest[i] & 0x0f];
322    }
323    *cp++ = '\0';
324
325    assert(cp == tmpBuf + kSHA1DigestOutputLen);
326
327    return tmpBuf;
328}
329
330/*
331 * Compute a hash code on a UTF-8 string, for use with internal hash tables.
332 *
333 * This may or may not be compatible with UTF-8 hash functions used inside
334 * the Dalvik VM.
335 *
336 * The basic "multiply by 31 and add" approach does better on class names
337 * than most other things tried (e.g. adler32).
338 */
339static u4 classDescriptorHash(const char* str)
340{
341    u4 hash = 1;
342
343    while (*str != '\0')
344        hash = hash * 31 + *str++;
345
346    return hash;
347}
348
349/*
350 * Add an entry to the class lookup table.  We hash the string and probe
351 * until we find an open slot.
352 */
353static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup,
354    int stringOff, int classDefOff, int* pNumProbes)
355{
356    const char* classDescriptor =
357        (const char*) (pDexFile->baseAddr + stringOff);
358    const DexClassDef* pClassDef =
359        (const DexClassDef*) (pDexFile->baseAddr + classDefOff);
360    u4 hash = classDescriptorHash(classDescriptor);
361    int mask = pLookup->numEntries-1;
362    int idx = hash & mask;
363
364    /*
365     * Find the first empty slot.  We oversized the table, so this is
366     * guaranteed to finish.
367     */
368    int probes = 0;
369    while (pLookup->table[idx].classDescriptorOffset != 0) {
370        idx = (idx + 1) & mask;
371        probes++;
372    }
373    //if (probes > 1)
374    //    LOGW("classLookupAdd: probes=%d\n", probes);
375
376    pLookup->table[idx].classDescriptorHash = hash;
377    pLookup->table[idx].classDescriptorOffset = stringOff;
378    pLookup->table[idx].classDefOffset = classDefOff;
379    *pNumProbes = probes;
380}
381
382/*
383 * Round up to the next highest power of 2.
384 *
385 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
386 */
387u4 dexRoundUpPower2(u4 val)
388{
389    val--;
390    val |= val >> 1;
391    val |= val >> 2;
392    val |= val >> 4;
393    val |= val >> 8;
394    val |= val >> 16;
395    val++;
396
397    return val;
398}
399
400/*
401 * Create the class lookup hash table.
402 *
403 * Returns newly-allocated storage.
404 */
405DexClassLookup* dexCreateClassLookup(DexFile* pDexFile)
406{
407    DexClassLookup* pLookup;
408    int allocSize;
409    int i, numEntries;
410    int numProbes, totalProbes, maxProbes;
411
412    numProbes = totalProbes = maxProbes = 0;
413
414    assert(pDexFile != NULL);
415
416    /*
417     * Using a factor of 3 results in far less probing than a factor of 2,
418     * but almost doubles the flash storage requirements for the bootstrap
419     * DEX files.  The overall impact on class loading performance seems
420     * to be minor.  We could probably get some performance improvement by
421     * using a secondary hash.
422     */
423    numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2);
424    allocSize = offsetof(DexClassLookup, table)
425                    + numEntries * sizeof(pLookup->table[0]);
426
427    pLookup = (DexClassLookup*) calloc(1, allocSize);
428    if (pLookup == NULL)
429        return NULL;
430    pLookup->size = allocSize;
431    pLookup->numEntries = numEntries;
432
433    for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) {
434        const DexClassDef* pClassDef;
435        const char* pString;
436
437        pClassDef = dexGetClassDef(pDexFile, i);
438        pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
439
440        classLookupAdd(pDexFile, pLookup,
441            (u1*)pString - pDexFile->baseAddr,
442            (u1*)pClassDef - pDexFile->baseAddr, &numProbes);
443
444        if (numProbes > maxProbes)
445            maxProbes = numProbes;
446        totalProbes += numProbes;
447    }
448
449    LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d"
450         " total=%d max=%d\n",
451        pDexFile->pHeader->classDefsSize, numEntries,
452        (100 * pDexFile->pHeader->classDefsSize) / numEntries,
453        allocSize, totalProbes, maxProbes);
454
455    return pLookup;
456}
457
458
459/*
460 * Set up the basic raw data pointers of a DexFile. This function isn't
461 * meant for general use.
462 */
463void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) {
464    DexHeader *pHeader = (DexHeader*) data;
465
466    pDexFile->baseAddr = data;
467    pDexFile->pHeader = pHeader;
468    pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff);
469    pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff);
470    pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff);
471    pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff);
472    pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff);
473    pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff);
474    pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff);
475}
476
477
478/*
479 * Parse out an index map entry, advancing "*pData" and reducing "*pSize".
480 */
481static bool parseIndexMapEntry(const u1** pData, u4* pSize, bool expanding,
482    u4* pFullCount, u4* pReducedCount, const u2** pMap)
483{
484    const u4* wordPtr = (const u4*) *pData;
485    u4 size = *pSize;
486    u4 mapCount;
487
488    if (expanding) {
489        if (size < 4)
490            return false;
491        mapCount = *pReducedCount = *wordPtr++;
492        *pFullCount = (u4) -1;
493        size -= sizeof(u4);
494    } else {
495        if (size < 8)
496            return false;
497        mapCount = *pFullCount = *wordPtr++;
498        *pReducedCount = *wordPtr++;
499        size -= sizeof(u4) * 2;
500    }
501
502    u4 mapSize = mapCount * sizeof(u2);
503
504    if (size < mapSize)
505        return false;
506    *pMap = (const u2*) wordPtr;
507    size -= mapSize;
508
509    /* advance the pointer */
510    const u1* ptr = (const u1*) wordPtr;
511    ptr += (mapSize + 3) & ~0x3;
512
513    /* update pass-by-reference values */
514    *pData = (const u1*) ptr;
515    *pSize = size;
516
517    return true;
518}
519
520/*
521 * Set up some pointers into the mapped data.
522 *
523 * See analysis/ReduceConstants.c for the data layout description.
524 */
525static bool parseIndexMap(DexFile* pDexFile, const u1* data, u4 size,
526    bool expanding)
527{
528    if (!parseIndexMapEntry(&data, &size, expanding,
529            &pDexFile->indexMap.classFullCount,
530            &pDexFile->indexMap.classReducedCount,
531            &pDexFile->indexMap.classMap))
532    {
533        return false;
534    }
535
536    if (!parseIndexMapEntry(&data, &size, expanding,
537            &pDexFile->indexMap.methodFullCount,
538            &pDexFile->indexMap.methodReducedCount,
539            &pDexFile->indexMap.methodMap))
540    {
541        return false;
542    }
543
544    if (!parseIndexMapEntry(&data, &size, expanding,
545            &pDexFile->indexMap.fieldFullCount,
546            &pDexFile->indexMap.fieldReducedCount,
547            &pDexFile->indexMap.fieldMap))
548    {
549        return false;
550    }
551
552    if (!parseIndexMapEntry(&data, &size, expanding,
553            &pDexFile->indexMap.stringFullCount,
554            &pDexFile->indexMap.stringReducedCount,
555            &pDexFile->indexMap.stringMap))
556    {
557        return false;
558    }
559
560    if (expanding) {
561        /*
562         * The map includes the "reduced" counts; pull the original counts
563         * out of the DexFile so that code has a consistent source.
564         */
565        assert(pDexFile->indexMap.classFullCount == (u4) -1);
566        assert(pDexFile->indexMap.methodFullCount == (u4) -1);
567        assert(pDexFile->indexMap.fieldFullCount == (u4) -1);
568        assert(pDexFile->indexMap.stringFullCount == (u4) -1);
569
570#if 0   // TODO: not available yet -- do later or just skip this
571        pDexFile->indexMap.classFullCount =
572            pDexFile->pHeader->typeIdsSize;
573        pDexFile->indexMap.methodFullCount =
574            pDexFile->pHeader->methodIdsSize;
575        pDexFile->indexMap.fieldFullCount =
576            pDexFile->pHeader->fieldIdsSize;
577        pDexFile->indexMap.stringFullCount =
578            pDexFile->pHeader->stringIdsSize;
579#endif
580    }
581
582    LOGI("Class : %u %u %u\n",
583        pDexFile->indexMap.classFullCount,
584        pDexFile->indexMap.classReducedCount,
585        pDexFile->indexMap.classMap[0]);
586    LOGI("Method: %u %u %u\n",
587        pDexFile->indexMap.methodFullCount,
588        pDexFile->indexMap.methodReducedCount,
589        pDexFile->indexMap.methodMap[0]);
590    LOGI("Field : %u %u %u\n",
591        pDexFile->indexMap.fieldFullCount,
592        pDexFile->indexMap.fieldReducedCount,
593        pDexFile->indexMap.fieldMap[0]);
594    LOGI("String: %u %u %u\n",
595        pDexFile->indexMap.stringFullCount,
596        pDexFile->indexMap.stringReducedCount,
597        pDexFile->indexMap.stringMap[0]);
598
599    return true;
600}
601
602/*
603 * Parse some auxillary data tables.
604 *
605 * v1.0 wrote a zero in the first 32 bits, followed by the DexClassLookup
606 * table.  Subsequent versions switched to the "chunk" format.
607 */
608static bool parseAuxData(const u1* data, DexFile* pDexFile)
609{
610    const u4* pAux = (const u4*) (data + pDexFile->pOptHeader->auxOffset);
611    u4 indexMapType = 0;
612
613    /* v1.0 format? */
614    if (*pAux == 0) {
615        LOGV("+++ found OLD dex format\n");
616        pDexFile->pClassLookup = (const DexClassLookup*) (pAux+1);
617        return true;
618    }
619    LOGV("+++ found NEW dex format\n");
620
621    /* process chunks until we see the end marker */
622    while (*pAux != kDexChunkEnd) {
623        u4 size = *(pAux+1);
624        u1* data = (u1*) (pAux + 2);
625
626        switch (*pAux) {
627        case kDexChunkClassLookup:
628            pDexFile->pClassLookup = (const DexClassLookup*) data;
629            break;
630        case kDexChunkReducingIndexMap:
631            LOGI("+++ found reducing index map, size=%u\n", size);
632            if (!parseIndexMap(pDexFile, data, size, false)) {
633                LOGE("Failed parsing reducing index map\n");
634                return false;
635            }
636            indexMapType = *pAux;
637            break;
638        case kDexChunkExpandingIndexMap:
639            LOGI("+++ found expanding index map, size=%u\n", size);
640            if (!parseIndexMap(pDexFile, data, size, true)) {
641                LOGE("Failed parsing expanding index map\n");
642                return false;
643            }
644            indexMapType = *pAux;
645            break;
646        default:
647            LOGI("Unknown chunk 0x%08x (%c%c%c%c), size=%d in aux data area\n",
648                *pAux,
649                (char) ((*pAux) >> 24), (char) ((*pAux) >> 16),
650                (char) ((*pAux) >> 8),  (char)  (*pAux),
651                size);
652            break;
653        }
654
655        /*
656         * Advance pointer, padding to 64-bit boundary.  The extra "+8" is
657         * for the type/size header.
658         */
659        size = (size + 8 + 7) & ~7;
660        pAux += size / sizeof(u4);
661    }
662
663#if 0   // TODO: propagate expected map type from the VM through the API
664    /*
665     * If we're configured to expect an index map, and we don't find one,
666     * reject this DEX so we'll regenerate it.  Also, if we found an
667     * "expanding" map but we're not configured to use it, we have to fail
668     * because the constants aren't usable without translation.
669     */
670    if (indexMapType != expectedIndexMapType) {
671        LOGW("Incompatible index map configuration: found 0x%04x, need %d\n",
672            indexMapType, DVM_REDUCE_CONSTANTS);
673        return false;
674    }
675#endif
676
677    return true;
678}
679
680/*
681 * Parse an optimized or unoptimized .dex file sitting in memory.  This is
682 * called after the byte-ordering and structure alignment has been fixed up.
683 *
684 * On success, return a newly-allocated DexFile.
685 */
686DexFile* dexFileParse(const u1* data, size_t length, int flags)
687{
688    DexFile* pDexFile = NULL;
689    const DexHeader* pHeader;
690    const u1* magic;
691    int result = -1;
692
693    if (length < sizeof(DexHeader)) {
694        LOGE("too short to be a valid .dex\n");
695        goto bail;      /* bad file format */
696    }
697
698    pDexFile = (DexFile*) malloc(sizeof(DexFile));
699    if (pDexFile == NULL)
700        goto bail;      /* alloc failure */
701    memset(pDexFile, 0, sizeof(DexFile));
702
703    /*
704     * Peel off the optimized header.
705     */
706    if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) {
707        magic = data;
708        if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) {
709            LOGE("bad opt version (0x%02x %02x %02x %02x)\n",
710                 magic[4], magic[5], magic[6], magic[7]);
711            goto bail;
712        }
713
714        pDexFile->pOptHeader = (const DexOptHeader*) data;
715        LOGV("Good opt header, DEX offset is %d, flags=0x%02x\n",
716            pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags);
717
718        /* locate some auxillary data tables */
719        if (!parseAuxData(data, pDexFile))
720            goto bail;
721
722        /* ignore the opt header and appended data from here on out */
723        data += pDexFile->pOptHeader->dexOffset;
724        length -= pDexFile->pOptHeader->dexOffset;
725        if (pDexFile->pOptHeader->dexLength > length) {
726            LOGE("File truncated? stored len=%d, rem len=%d\n",
727                pDexFile->pOptHeader->dexLength, (int) length);
728            goto bail;
729        }
730        length = pDexFile->pOptHeader->dexLength;
731    }
732
733    dexFileSetupBasicPointers(pDexFile, data);
734    pHeader = pDexFile->pHeader;
735
736    magic = pHeader->magic;
737    if (memcmp(magic, DEX_MAGIC, 4) != 0) {
738        /* not expected */
739        LOGE("bad magic number (0x%02x %02x %02x %02x)\n",
740             magic[0], magic[1], magic[2], magic[3]);
741        goto bail;
742    }
743    if (memcmp(magic+4, DEX_MAGIC_VERS, 4) != 0) {
744        LOGE("bad dex version (0x%02x %02x %02x %02x)\n",
745             magic[4], magic[5], magic[6], magic[7]);
746        goto bail;
747    }
748
749    /*
750     * Verify the checksum.  This is reasonably quick, but does require
751     * touching every byte in the DEX file.  The checksum changes after
752     * byte-swapping and DEX optimization.
753     */
754    if (flags & kDexParseVerifyChecksum) {
755        u4 adler = dexComputeChecksum(pHeader);
756        if (adler != pHeader->checksum) {
757            LOGE("ERROR: bad checksum (%08x vs %08x)\n",
758                adler, pHeader->checksum);
759            if (!(flags & kDexParseContinueOnError))
760                goto bail;
761        } else {
762            LOGV("+++ adler32 checksum (%08x) verified\n", adler);
763        }
764    }
765
766    /*
767     * Verify the SHA-1 digest.  (Normally we don't want to do this --
768     * the digest is used to uniquely identify a DEX file, and can't be
769     * computed post-optimization.)
770     *
771     * The digest will be invalid after byte swapping and DEX optimization.
772     */
773    if (kVerifySignature) {
774        unsigned char sha1Digest[kSHA1DigestLen];
775        const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) +
776                            kSHA1DigestLen;
777
778        dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
779        if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
780            char tmpBuf1[kSHA1DigestOutputLen];
781            char tmpBuf2[kSHA1DigestOutputLen];
782            LOGE("ERROR: bad SHA1 digest (%s vs %s)\n",
783                dexSHA1DigestToStr(sha1Digest, tmpBuf1),
784                dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
785            if (!(flags & kDexParseContinueOnError))
786                goto bail;
787        } else {
788            LOGV("+++ sha1 digest verified\n");
789        }
790    }
791
792    if (pHeader->fileSize != length) {
793        LOGE("ERROR: stored file size (%d) != expected (%d)\n",
794            (int) pHeader->fileSize, (int) length);
795        if (!(flags & kDexParseContinueOnError))
796            goto bail;
797    }
798
799    if (pHeader->classDefsSize == 0) {
800        LOGE("ERROR: DEX file has no classes in it, failing\n");
801        goto bail;
802    }
803
804    /*
805     * Success!
806     */
807    result = 0;
808
809bail:
810    if (result != 0 && pDexFile != NULL) {
811        dexFileFree(pDexFile);
812        pDexFile = NULL;
813    }
814    return pDexFile;
815}
816
817/*
818 * Free up the DexFile and any associated data structures.
819 *
820 * Note we may be called with a partially-initialized DexFile.
821 */
822void dexFileFree(DexFile* pDexFile)
823{
824    if (pDexFile == NULL)
825        return;
826
827    free(pDexFile);
828}
829
830/*
831 * Look up a class definition entry by descriptor.
832 *
833 * "descriptor" should look like "Landroid/debug/Stuff;".
834 */
835const DexClassDef* dexFindClass(const DexFile* pDexFile,
836    const char* descriptor)
837{
838    const DexClassLookup* pLookup = pDexFile->pClassLookup;
839    u4 hash;
840    int idx, mask;
841
842    hash = classDescriptorHash(descriptor);
843    mask = pLookup->numEntries - 1;
844    idx = hash & mask;
845
846    /*
847     * Search until we find a matching entry or an empty slot.
848     */
849    while (true) {
850        int offset;
851
852        offset = pLookup->table[idx].classDescriptorOffset;
853        if (offset == 0)
854            return NULL;
855
856        if (pLookup->table[idx].classDescriptorHash == hash) {
857            const char* str;
858
859            str = (const char*) (pDexFile->baseAddr + offset);
860            if (strcmp(str, descriptor) == 0) {
861                return (const DexClassDef*)
862                    (pDexFile->baseAddr + pLookup->table[idx].classDefOffset);
863            }
864        }
865
866        idx = (idx + 1) & mask;
867    }
868}
869
870
871/*
872 * Compute the DEX file checksum for a memory-mapped DEX file.
873 */
874u4 dexComputeChecksum(const DexHeader* pHeader)
875{
876    const u1* start = (const u1*) pHeader;
877
878    uLong adler = adler32(0L, Z_NULL, 0);
879    const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);
880
881    return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum);
882}
883
884
885/*
886 * ===========================================================================
887 *      Debug info
888 * ===========================================================================
889 */
890
891/*
892 * Decode the arguments in a method signature, which looks something
893 * like "(ID[Ljava/lang/String;)V".
894 *
895 * Returns the type signature letter for the next argument, or ')' if
896 * there are no more args.  Advances "pSig" to point to the character
897 * after the one returned.
898 */
899static char decodeSignature(const char** pSig)
900{
901    const char* sig = *pSig;
902
903    if (*sig == '(')
904        sig++;
905
906    if (*sig == 'L') {
907        /* object ref */
908        while (*++sig != ';')
909            ;
910        *pSig = sig+1;
911        return 'L';
912    }
913    if (*sig == '[') {
914        /* array; advance past array type */
915        while (*++sig == '[')
916            ;
917        if (*sig == 'L') {
918            while (*++sig != ';')
919                ;
920        }
921        *pSig = sig+1;
922        return '[';
923    }
924    if (*sig == '\0')
925        return *sig;        /* don't advance further */
926
927    *pSig = sig+1;
928    return *sig;
929}
930
931/*
932 * returns the length of a type string, given the start of the
933 * type string. Used for the case where the debug info format
934 * references types that are inside a method type signature.
935 */
936static int typeLength (const char *type) {
937    // Assumes any leading '(' has already been gobbled
938    const char *end = type;
939    decodeSignature(&end);
940    return end - type;
941}
942
943/*
944 * Reads a string index as encoded for the debug info format,
945 * returning a string pointer or NULL as appropriate.
946 */
947static const char* readStringIdx(const DexFile* pDexFile,
948        const u1** pStream) {
949    u4 stringIdx = readUnsignedLeb128(pStream);
950
951    // Remember, encoded string indicies have 1 added to them.
952    if (stringIdx == 0) {
953        return NULL;
954    } else {
955        return dexStringById(pDexFile, stringIdx - 1);
956    }
957}
958
959/*
960 * Reads a type index as encoded for the debug info format, returning
961 * a string pointer for its descriptor or NULL as appropriate.
962 */
963static const char* readTypeIdx(const DexFile* pDexFile,
964        const u1** pStream) {
965    u4 typeIdx = readUnsignedLeb128(pStream);
966
967    // Remember, encoded type indicies have 1 added to them.
968    if (typeIdx == 0) {
969        return NULL;
970    } else {
971        return dexStringByTypeIdx(pDexFile, typeIdx - 1);
972    }
973}
974
975/* access_flag value indicating that a method is static */
976#define ACC_STATIC              0x0008
977
978typedef struct LocalInfo {
979    const char *name;
980    const char *descriptor;
981    const char *signature;
982    u2 startAddress;
983    bool live;
984} LocalInfo;
985
986static void emitLocalCbIfLive (void *cnxt, int reg, u4 endAddress,
987        LocalInfo *localInReg, DexDebugNewLocalCb localCb)
988{
989    if (localCb != NULL && localInReg[reg].live) {
990        localCb(cnxt, reg, localInReg[reg].startAddress, endAddress,
991                localInReg[reg].name,
992                localInReg[reg].descriptor,
993                localInReg[reg].signature == NULL
994                ? "" : localInReg[reg].signature );
995    }
996}
997
998// TODO optimize localCb == NULL case
999void dexDecodeDebugInfo(
1000            const DexFile* pDexFile,
1001            const DexCode* pCode,
1002            const char* classDescriptor,
1003            u4 protoIdx,
1004            u4 accessFlags,
1005            DexDebugNewPositionCb posCb, DexDebugNewLocalCb localCb,
1006            void* cnxt)
1007{
1008    const u1 *stream = dexGetDebugInfoStream(pDexFile, pCode);
1009    u4 line;
1010    u4 parametersSize;
1011    u4 address = 0;
1012    LocalInfo localInReg[pCode->registersSize];
1013    u4 insnsSize = pCode->insnsSize;
1014    DexProto proto = { pDexFile, protoIdx };
1015
1016    memset(localInReg, 0, sizeof(LocalInfo) * pCode->registersSize);
1017
1018    if (stream == NULL) {
1019        goto end;
1020    }
1021
1022    line = readUnsignedLeb128(&stream);
1023    parametersSize = readUnsignedLeb128(&stream);
1024
1025    u2 argReg = pCode->registersSize - pCode->insSize;
1026
1027    if ((accessFlags & ACC_STATIC) == 0) {
1028        /*
1029         * The code is an instance method, which means that there is
1030         * an initial this parameter. Also, the proto list should
1031         * contain exactly one fewer argument word than the insSize
1032         * indicates.
1033         */
1034        assert(pCode->insSize == (dexProtoComputeArgsSize(&proto) + 1));
1035        localInReg[argReg].name = "this";
1036        localInReg[argReg].descriptor = classDescriptor;
1037        localInReg[argReg].startAddress = 0;
1038        localInReg[argReg].live = true;
1039        argReg++;
1040    } else {
1041        assert(pCode->insSize == dexProtoComputeArgsSize(&proto));
1042    }
1043
1044    DexParameterIterator iterator;
1045    dexParameterIteratorInit(&iterator, &proto);
1046
1047    while (parametersSize-- != 0) {
1048        const char* descriptor = dexParameterIteratorNextDescriptor(&iterator);
1049        const char *name;
1050        int reg;
1051
1052        if ((argReg >= pCode->registersSize) || (descriptor == NULL)) {
1053            goto invalid_stream;
1054        }
1055
1056        name = readStringIdx(pDexFile, &stream);
1057        reg = argReg;
1058
1059        switch (descriptor[0]) {
1060            case 'D':
1061            case 'J':
1062                argReg += 2;
1063                break;
1064            default:
1065                argReg += 1;
1066                break;
1067        }
1068
1069        if (name != NULL) {
1070            localInReg[reg].name = name;
1071            localInReg[reg].descriptor = descriptor;
1072            localInReg[reg].signature = NULL;
1073            localInReg[reg].startAddress = address;
1074            localInReg[reg].live = true;
1075        }
1076    }
1077
1078    for (;;)  {
1079        u1 opcode = *stream++;
1080        u2 reg;
1081
1082        switch (opcode) {
1083            case DBG_END_SEQUENCE:
1084                goto end;
1085
1086            case DBG_ADVANCE_PC:
1087                address += readUnsignedLeb128(&stream);
1088                break;
1089
1090            case DBG_ADVANCE_LINE:
1091                line += readSignedLeb128(&stream);
1092                break;
1093
1094            case DBG_START_LOCAL:
1095            case DBG_START_LOCAL_EXTENDED:
1096                reg = readUnsignedLeb128(&stream);
1097                if (reg > pCode->registersSize) goto invalid_stream;
1098
1099                // Emit what was previously there, if anything
1100                emitLocalCbIfLive (cnxt, reg, address,
1101                    localInReg, localCb);
1102
1103                localInReg[reg].name = readStringIdx(pDexFile, &stream);
1104                localInReg[reg].descriptor = readTypeIdx(pDexFile, &stream);
1105                if (opcode == DBG_START_LOCAL_EXTENDED) {
1106                    localInReg[reg].signature
1107                        = readStringIdx(pDexFile, &stream);
1108                } else {
1109                    localInReg[reg].signature = NULL;
1110                }
1111                localInReg[reg].startAddress = address;
1112                localInReg[reg].live = true;
1113                break;
1114
1115            case DBG_END_LOCAL:
1116                reg = readUnsignedLeb128(&stream);
1117                if (reg > pCode->registersSize) goto invalid_stream;
1118
1119                emitLocalCbIfLive (cnxt, reg, address, localInReg, localCb);
1120                localInReg[reg].live = false;
1121                break;
1122
1123            case DBG_RESTART_LOCAL:
1124                reg = readUnsignedLeb128(&stream);
1125                if (reg > pCode->registersSize) goto invalid_stream;
1126
1127                if (localInReg[reg].name == NULL
1128                        || localInReg[reg].descriptor == NULL) {
1129                    goto invalid_stream;
1130                }
1131
1132                /*
1133                 * If the register is live, the "restart" is superfluous,
1134                 * and we don't want to mess with the existing start address.
1135                 */
1136                if (!localInReg[reg].live) {
1137                    localInReg[reg].startAddress = address;
1138                    localInReg[reg].live = true;
1139                }
1140                break;
1141
1142            case DBG_SET_PROLOGUE_END:
1143            case DBG_SET_EPILOGUE_BEGIN:
1144            case DBG_SET_FILE:
1145                break;
1146
1147            default: {
1148                int adjopcode = opcode - DBG_FIRST_SPECIAL;
1149
1150                address += adjopcode / DBG_LINE_RANGE;
1151                line += DBG_LINE_BASE + (adjopcode % DBG_LINE_RANGE);
1152
1153                if (posCb != NULL) {
1154                    int done;
1155                    done = posCb(cnxt, address, line);
1156
1157                    if (done) {
1158                        // early exit
1159                        goto end;
1160                    }
1161                }
1162                break;
1163            }
1164        }
1165    }
1166
1167end:
1168    {
1169        int reg;
1170        for (reg = 0; reg < pCode->registersSize; reg++) {
1171            emitLocalCbIfLive (cnxt, reg, insnsSize, localInReg, localCb);
1172        }
1173    }
1174    return;
1175
1176invalid_stream:
1177    IF_LOGE() {
1178        char* methodDescriptor = dexProtoCopyMethodDescriptor(&proto);
1179        LOGE("Invalid debug info stream. class %s; proto %s",
1180                classDescriptor, methodDescriptor);
1181        free(methodDescriptor);
1182    }
1183}
1184