DexFile.cpp revision a70a3d8faa8f7332549fa0c9ae2008d428e28606
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Access the contents of a .dex file.
19 */
20
21#include "DexFile.h"
22#include "DexOptData.h"
23#include "DexProto.h"
24#include "DexCatch.h"
25#include "Leb128.h"
26#include "sha1.h"
27#include "ZipArchive.h"
28
29#include <zlib.h>
30
31#include <stdlib.h>
32#include <stddef.h>
33#include <string.h>
34#include <fcntl.h>
35#include <errno.h>
36
37
38/*
39 * Verifying checksums is good, but it slows things down and causes us to
40 * touch every page.  In the "optimized" world, it doesn't work at all,
41 * because we rewrite the contents.
42 */
43static const bool kVerifyChecksum = false;
44static const bool kVerifySignature = false;
45
46/* (documented in header) */
47char dexGetPrimitiveTypeDescriptorChar(PrimitiveType type) {
48    const char* string = dexGetPrimitiveTypeDescriptor(type);
49
50    return (string == NULL) ? '\0' : string[0];
51}
52
53/* (documented in header) */
54const char* dexGetPrimitiveTypeDescriptor(PrimitiveType type) {
55    switch (type) {
56        case PRIM_VOID:    return "V";
57        case PRIM_BOOLEAN: return "Z";
58        case PRIM_BYTE:    return "B";
59        case PRIM_SHORT:   return "S";
60        case PRIM_CHAR:    return "C";
61        case PRIM_INT:     return "I";
62        case PRIM_LONG:    return "J";
63        case PRIM_FLOAT:   return "F";
64        case PRIM_DOUBLE:  return "D";
65        default:           return NULL;
66    }
67
68    return NULL;
69}
70
71/* (documented in header) */
72const char* dexGetBoxedTypeDescriptor(PrimitiveType type) {
73    switch (type) {
74        case PRIM_VOID:    return NULL;
75        case PRIM_BOOLEAN: return "Ljava/lang/Boolean;";
76        case PRIM_BYTE:    return "Ljava/lang/Byte;";
77        case PRIM_SHORT:   return "Ljava/lang/Short;";
78        case PRIM_CHAR:    return "Ljava/lang/Character;";
79        case PRIM_INT:     return "Ljava/lang/Integer;";
80        case PRIM_LONG:    return "Ljava/lang/Long;";
81        case PRIM_FLOAT:   return "Ljava/lang/Float;";
82        case PRIM_DOUBLE:  return "Ljava/lang/Double;";
83        default:           return NULL;
84    }
85}
86
87/* (documented in header) */
88PrimitiveType dexGetPrimitiveTypeFromDescriptorChar(char descriptorChar) {
89    switch (descriptorChar) {
90        case 'V': return PRIM_VOID;
91        case 'Z': return PRIM_BOOLEAN;
92        case 'B': return PRIM_BYTE;
93        case 'S': return PRIM_SHORT;
94        case 'C': return PRIM_CHAR;
95        case 'I': return PRIM_INT;
96        case 'J': return PRIM_LONG;
97        case 'F': return PRIM_FLOAT;
98        case 'D': return PRIM_DOUBLE;
99        default:  return PRIM_NOT;
100    }
101}
102
103/* Return the UTF-8 encoded string with the specified string_id index,
104 * also filling in the UTF-16 size (number of 16-bit code points).*/
105const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx,
106        u4* utf16Size) {
107    const DexStringId* pStringId = dexGetStringId(pDexFile, idx);
108    const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff;
109
110    *utf16Size = readUnsignedLeb128(&ptr);
111    return (const char*) ptr;
112}
113
114/*
115 * Format an SHA-1 digest for printing.  tmpBuf must be able to hold at
116 * least kSHA1DigestOutputLen bytes.
117 */
118const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf);
119
120/*
121 * Compute a SHA-1 digest on a range of bytes.
122 */
123static void dexComputeSHA1Digest(const unsigned char* data, size_t length,
124    unsigned char digest[])
125{
126    SHA1_CTX context;
127    SHA1Init(&context);
128    SHA1Update(&context, data, length);
129    SHA1Final(digest, &context);
130}
131
132/*
133 * Format the SHA-1 digest into the buffer, which must be able to hold at
134 * least kSHA1DigestOutputLen bytes.  Returns a pointer to the buffer,
135 */
136static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf)
137{
138    static const char hexDigit[] = "0123456789abcdef";
139    char* cp;
140    int i;
141
142    cp = tmpBuf;
143    for (i = 0; i < kSHA1DigestLen; i++) {
144        *cp++ = hexDigit[digest[i] >> 4];
145        *cp++ = hexDigit[digest[i] & 0x0f];
146    }
147    *cp++ = '\0';
148
149    assert(cp == tmpBuf + kSHA1DigestOutputLen);
150
151    return tmpBuf;
152}
153
154/*
155 * Compute a hash code on a UTF-8 string, for use with internal hash tables.
156 *
157 * This may or may not be compatible with UTF-8 hash functions used inside
158 * the Dalvik VM.
159 *
160 * The basic "multiply by 31 and add" approach does better on class names
161 * than most other things tried (e.g. adler32).
162 */
163static u4 classDescriptorHash(const char* str)
164{
165    u4 hash = 1;
166
167    while (*str != '\0')
168        hash = hash * 31 + *str++;
169
170    return hash;
171}
172
173/*
174 * Add an entry to the class lookup table.  We hash the string and probe
175 * until we find an open slot.
176 */
177static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup,
178    int stringOff, int classDefOff, int* pNumProbes)
179{
180    const char* classDescriptor =
181        (const char*) (pDexFile->baseAddr + stringOff);
182    const DexClassDef* pClassDef =
183        (const DexClassDef*) (pDexFile->baseAddr + classDefOff);
184    u4 hash = classDescriptorHash(classDescriptor);
185    int mask = pLookup->numEntries-1;
186    int idx = hash & mask;
187
188    /*
189     * Find the first empty slot.  We oversized the table, so this is
190     * guaranteed to finish.
191     */
192    int probes = 0;
193    while (pLookup->table[idx].classDescriptorOffset != 0) {
194        idx = (idx + 1) & mask;
195        probes++;
196    }
197    //if (probes > 1)
198    //    LOGW("classLookupAdd: probes=%d\n", probes);
199
200    pLookup->table[idx].classDescriptorHash = hash;
201    pLookup->table[idx].classDescriptorOffset = stringOff;
202    pLookup->table[idx].classDefOffset = classDefOff;
203    *pNumProbes = probes;
204}
205
206/*
207 * Create the class lookup hash table.
208 *
209 * Returns newly-allocated storage.
210 */
211DexClassLookup* dexCreateClassLookup(DexFile* pDexFile)
212{
213    DexClassLookup* pLookup;
214    int allocSize;
215    int i, numEntries;
216    int numProbes, totalProbes, maxProbes;
217
218    numProbes = totalProbes = maxProbes = 0;
219
220    assert(pDexFile != NULL);
221
222    /*
223     * Using a factor of 3 results in far less probing than a factor of 2,
224     * but almost doubles the flash storage requirements for the bootstrap
225     * DEX files.  The overall impact on class loading performance seems
226     * to be minor.  We could probably get some performance improvement by
227     * using a secondary hash.
228     */
229    numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2);
230    allocSize = offsetof(DexClassLookup, table)
231                    + numEntries * sizeof(pLookup->table[0]);
232
233    pLookup = (DexClassLookup*) calloc(1, allocSize);
234    if (pLookup == NULL)
235        return NULL;
236    pLookup->size = allocSize;
237    pLookup->numEntries = numEntries;
238
239    for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) {
240        const DexClassDef* pClassDef;
241        const char* pString;
242
243        pClassDef = dexGetClassDef(pDexFile, i);
244        pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
245
246        classLookupAdd(pDexFile, pLookup,
247            (u1*)pString - pDexFile->baseAddr,
248            (u1*)pClassDef - pDexFile->baseAddr, &numProbes);
249
250        if (numProbes > maxProbes)
251            maxProbes = numProbes;
252        totalProbes += numProbes;
253    }
254
255    LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d"
256         " total=%d max=%d\n",
257        pDexFile->pHeader->classDefsSize, numEntries,
258        (100 * pDexFile->pHeader->classDefsSize) / numEntries,
259        allocSize, totalProbes, maxProbes);
260
261    return pLookup;
262}
263
264
265/*
266 * Set up the basic raw data pointers of a DexFile. This function isn't
267 * meant for general use.
268 */
269void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) {
270    DexHeader *pHeader = (DexHeader*) data;
271
272    pDexFile->baseAddr = data;
273    pDexFile->pHeader = pHeader;
274    pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff);
275    pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff);
276    pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff);
277    pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff);
278    pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff);
279    pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff);
280    pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff);
281}
282
283/*
284 * Parse an optimized or unoptimized .dex file sitting in memory.  This is
285 * called after the byte-ordering and structure alignment has been fixed up.
286 *
287 * On success, return a newly-allocated DexFile.
288 */
289DexFile* dexFileParse(const u1* data, size_t length, int flags)
290{
291    DexFile* pDexFile = NULL;
292    const DexHeader* pHeader;
293    const u1* magic;
294    int result = -1;
295
296    if (length < sizeof(DexHeader)) {
297        LOGE("too short to be a valid .dex\n");
298        goto bail;      /* bad file format */
299    }
300
301    pDexFile = (DexFile*) malloc(sizeof(DexFile));
302    if (pDexFile == NULL)
303        goto bail;      /* alloc failure */
304    memset(pDexFile, 0, sizeof(DexFile));
305
306    /*
307     * Peel off the optimized header.
308     */
309    if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) {
310        magic = data;
311        if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) {
312            LOGE("bad opt version (0x%02x %02x %02x %02x)\n",
313                 magic[4], magic[5], magic[6], magic[7]);
314            goto bail;
315        }
316
317        pDexFile->pOptHeader = (const DexOptHeader*) data;
318        LOGV("Good opt header, DEX offset is %d, flags=0x%02x\n",
319            pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags);
320
321        /* parse the optimized dex file tables */
322        if (!dexParseOptData(data, length, pDexFile))
323            goto bail;
324
325        /* ignore the opt header and appended data from here on out */
326        data += pDexFile->pOptHeader->dexOffset;
327        length -= pDexFile->pOptHeader->dexOffset;
328        if (pDexFile->pOptHeader->dexLength > length) {
329            LOGE("File truncated? stored len=%d, rem len=%d\n",
330                pDexFile->pOptHeader->dexLength, (int) length);
331            goto bail;
332        }
333        length = pDexFile->pOptHeader->dexLength;
334    }
335
336    dexFileSetupBasicPointers(pDexFile, data);
337    pHeader = pDexFile->pHeader;
338
339    magic = pHeader->magic;
340    if (memcmp(magic, DEX_MAGIC, 4) != 0) {
341        /* not expected */
342        LOGE("bad magic number (0x%02x %02x %02x %02x)\n",
343             magic[0], magic[1], magic[2], magic[3]);
344        goto bail;
345    }
346    if (memcmp(magic+4, DEX_MAGIC_VERS, 4) != 0) {
347        LOGE("bad dex version (0x%02x %02x %02x %02x)\n",
348             magic[4], magic[5], magic[6], magic[7]);
349        goto bail;
350    }
351
352    /*
353     * Verify the checksum(s).  This is reasonably quick, but does require
354     * touching every byte in the DEX file.  The base checksum changes after
355     * byte-swapping and DEX optimization.
356     */
357    if (flags & kDexParseVerifyChecksum) {
358        u4 adler = dexComputeChecksum(pHeader);
359        if (adler != pHeader->checksum) {
360            LOGE("ERROR: bad checksum (%08x vs %08x)\n",
361                adler, pHeader->checksum);
362            if (!(flags & kDexParseContinueOnError))
363                goto bail;
364        } else {
365            LOGV("+++ adler32 checksum (%08x) verified\n", adler);
366        }
367
368        const DexOptHeader* pOptHeader = pDexFile->pOptHeader;
369        if (pOptHeader != NULL) {
370            adler = dexComputeOptChecksum(pOptHeader);
371            if (adler != pOptHeader->checksum) {
372                LOGE("ERROR: bad opt checksum (%08x vs %08x)\n",
373                    adler, pOptHeader->checksum);
374                if (!(flags & kDexParseContinueOnError))
375                    goto bail;
376            } else {
377                LOGV("+++ adler32 opt checksum (%08x) verified\n", adler);
378            }
379        }
380    }
381
382    /*
383     * Verify the SHA-1 digest.  (Normally we don't want to do this --
384     * the digest is used to uniquely identify the original DEX file, and
385     * can't be computed for verification after the DEX is byte-swapped
386     * and optimized.)
387     */
388    if (kVerifySignature) {
389        unsigned char sha1Digest[kSHA1DigestLen];
390        const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) +
391                            kSHA1DigestLen;
392
393        dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest);
394        if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) {
395            char tmpBuf1[kSHA1DigestOutputLen];
396            char tmpBuf2[kSHA1DigestOutputLen];
397            LOGE("ERROR: bad SHA1 digest (%s vs %s)\n",
398                dexSHA1DigestToStr(sha1Digest, tmpBuf1),
399                dexSHA1DigestToStr(pHeader->signature, tmpBuf2));
400            if (!(flags & kDexParseContinueOnError))
401                goto bail;
402        } else {
403            LOGV("+++ sha1 digest verified\n");
404        }
405    }
406
407    if (pHeader->fileSize != length) {
408        LOGE("ERROR: stored file size (%d) != expected (%d)\n",
409            (int) pHeader->fileSize, (int) length);
410        if (!(flags & kDexParseContinueOnError))
411            goto bail;
412    }
413
414    if (pHeader->classDefsSize == 0) {
415        LOGE("ERROR: DEX file has no classes in it, failing\n");
416        goto bail;
417    }
418
419    /*
420     * Success!
421     */
422    result = 0;
423
424bail:
425    if (result != 0 && pDexFile != NULL) {
426        dexFileFree(pDexFile);
427        pDexFile = NULL;
428    }
429    return pDexFile;
430}
431
432/*
433 * Free up the DexFile and any associated data structures.
434 *
435 * Note we may be called with a partially-initialized DexFile.
436 */
437void dexFileFree(DexFile* pDexFile)
438{
439    if (pDexFile == NULL)
440        return;
441
442    free(pDexFile);
443}
444
445/*
446 * Look up a class definition entry by descriptor.
447 *
448 * "descriptor" should look like "Landroid/debug/Stuff;".
449 */
450const DexClassDef* dexFindClass(const DexFile* pDexFile,
451    const char* descriptor)
452{
453    const DexClassLookup* pLookup = pDexFile->pClassLookup;
454    u4 hash;
455    int idx, mask;
456
457    hash = classDescriptorHash(descriptor);
458    mask = pLookup->numEntries - 1;
459    idx = hash & mask;
460
461    /*
462     * Search until we find a matching entry or an empty slot.
463     */
464    while (true) {
465        int offset;
466
467        offset = pLookup->table[idx].classDescriptorOffset;
468        if (offset == 0)
469            return NULL;
470
471        if (pLookup->table[idx].classDescriptorHash == hash) {
472            const char* str;
473
474            str = (const char*) (pDexFile->baseAddr + offset);
475            if (strcmp(str, descriptor) == 0) {
476                return (const DexClassDef*)
477                    (pDexFile->baseAddr + pLookup->table[idx].classDefOffset);
478            }
479        }
480
481        idx = (idx + 1) & mask;
482    }
483}
484
485
486/*
487 * Compute the DEX file checksum for a memory-mapped DEX file.
488 */
489u4 dexComputeChecksum(const DexHeader* pHeader)
490{
491    const u1* start = (const u1*) pHeader;
492
493    uLong adler = adler32(0L, Z_NULL, 0);
494    const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum);
495
496    return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum);
497}
498
499/*
500 * Compute the size, in bytes, of a DexCode.
501 */
502size_t dexGetDexCodeSize(const DexCode* pCode)
503{
504    /*
505     * The catch handler data is the last entry.  It has a variable number
506     * of variable-size pieces, so we need to create an iterator.
507     */
508    u4 handlersSize;
509    u4 offset;
510    u4 ui;
511
512    if (pCode->triesSize != 0) {
513        handlersSize = dexGetHandlersSize(pCode);
514        offset = dexGetFirstHandlerOffset(pCode);
515    } else {
516        handlersSize = 0;
517        offset = 0;
518    }
519
520    for (ui = 0; ui < handlersSize; ui++) {
521        DexCatchIterator iterator;
522        dexCatchIteratorInit(&iterator, pCode, offset);
523        offset = dexCatchIteratorGetEndOffset(&iterator, pCode);
524    }
525
526    const u1* handlerData = dexGetCatchHandlerData(pCode);
527
528    //LOGD("+++ pCode=%p handlerData=%p last offset=%d\n",
529    //    pCode, handlerData, offset);
530
531    /* return the size of the catch handler + everything before it */
532    return (handlerData - (u1*) pCode) + offset;
533}
534
535/*
536 * Round up to the next highest power of 2.
537 *
538 * Found on http://graphics.stanford.edu/~seander/bithacks.html.
539 */
540u4 dexRoundUpPower2(u4 val)
541{
542    val--;
543    val |= val >> 1;
544    val |= val >> 2;
545    val |= val >> 4;
546    val |= val >> 8;
547    val |= val >> 16;
548    val++;
549
550    return val;
551}
552