DexData.java revision 12d6d4c0ea192b6a924df0df1e3b14ce1ed5793b
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.dexdeps;
18
19import java.io.IOException;
20import java.io.RandomAccessFile;
21import java.util.Arrays;
22
23/**
24 * Data extracted from a DEX file.
25 */
26public class DexData {
27    private RandomAccessFile mDexFile;
28    private HeaderItem mHeaderItem;
29    private String[] mStrings;              // strings from string_data_*
30    private TypeIdItem[] mTypeIds;
31    private ProtoIdItem[] mProtoIds;
32    private FieldIdItem[] mFieldIds;
33    private MethodIdItem[] mMethodIds;
34    private ClassDefItem[] mClassDefs;
35
36    private byte tmpBuf[] = new byte[4];
37    private boolean isBigEndian = false;
38
39    /**
40     * Constructs a new DexData for this file.
41     */
42    public DexData(RandomAccessFile raf) {
43        mDexFile = raf;
44    }
45
46    /**
47     * Loads the contents of the DEX file into our data structures.
48     *
49     * @throws IOException if we encounter a problem while reading
50     * @throws DexDataException if the DEX contents look bad
51     */
52    public void load() throws IOException {
53        parseHeaderItem();
54
55        loadStrings();
56        loadTypeIds();
57        loadProtoIds();
58        loadFieldIds();
59        loadMethodIds();
60        loadClassDefs();
61
62        markInternalClasses();
63    }
64
65
66    /**
67     * Parses the interesting bits out of the header.
68     */
69    void parseHeaderItem() throws IOException {
70        mHeaderItem = new HeaderItem();
71
72        seek(0);
73
74        byte[] magic = new byte[8];
75        readBytes(magic);
76        if (!Arrays.equals(magic, HeaderItem.DEX_FILE_MAGIC)) {
77            System.err.println("Magic number is wrong -- are you sure " +
78                "this is a DEX file?");
79            throw new DexDataException();
80        }
81
82        /*
83         * Read the endian tag, so we properly swap things as we read
84         * them from here on.
85         */
86        seek(8+4+20+4+4);
87        mHeaderItem.endianTag = readInt();
88        if (mHeaderItem.endianTag == HeaderItem.ENDIAN_CONSTANT) {
89            /* do nothing */
90        } else if (mHeaderItem.endianTag == HeaderItem.REVERSE_ENDIAN_CONSTANT){
91            /* file is big-endian (!), reverse future reads */
92            isBigEndian = true;
93        } else {
94            System.err.println("Endian constant has unexpected value " +
95                Integer.toHexString(mHeaderItem.endianTag));
96            throw new DexDataException();
97        }
98
99        seek(8+4+20);  // magic, checksum, signature
100        mHeaderItem.fileSize = readInt();
101        mHeaderItem.headerSize = readInt();
102        /*mHeaderItem.endianTag =*/ readInt();
103        /*mHeaderItem.linkSize =*/ readInt();
104        /*mHeaderItem.linkOff =*/ readInt();
105        /*mHeaderItem.mapOff =*/ readInt();
106        mHeaderItem.stringIdsSize = readInt();
107        mHeaderItem.stringIdsOff = readInt();
108        mHeaderItem.typeIdsSize = readInt();
109        mHeaderItem.typeIdsOff = readInt();
110        mHeaderItem.protoIdsSize = readInt();
111        mHeaderItem.protoIdsOff = readInt();
112        mHeaderItem.fieldIdsSize = readInt();
113        mHeaderItem.fieldIdsOff = readInt();
114        mHeaderItem.methodIdsSize = readInt();
115        mHeaderItem.methodIdsOff = readInt();
116        mHeaderItem.classDefsSize = readInt();
117        mHeaderItem.classDefsOff = readInt();
118        /*mHeaderItem.dataSize =*/ readInt();
119        /*mHeaderItem.dataOff =*/ readInt();
120    }
121
122    /**
123     * Loads the string table out of the DEX.
124     *
125     * First we read all of the string_id_items, then we read all of the
126     * string_data_item.  Doing it this way should allow us to avoid
127     * seeking around in the file.
128     */
129    void loadStrings() throws IOException {
130        int count = mHeaderItem.stringIdsSize;
131        int stringOffsets[] = new int[count];
132
133        //System.out.println("reading " + count + " strings");
134
135        seek(mHeaderItem.stringIdsOff);
136        for (int i = 0; i < count; i++) {
137            stringOffsets[i] = readInt();
138        }
139
140        mStrings = new String[count];
141
142        seek(stringOffsets[0]);
143        for (int i = 0; i < count; i++) {
144            seek(stringOffsets[i]);         // should be a no-op
145            mStrings[i] = readString();
146            //System.out.println("STR: " + i + ": " + mStrings[i]);
147        }
148    }
149
150    /**
151     * Loads the type ID list.
152     */
153    void loadTypeIds() throws IOException {
154        int count = mHeaderItem.typeIdsSize;
155        mTypeIds = new TypeIdItem[count];
156
157        //System.out.println("reading " + count + " typeIds");
158        seek(mHeaderItem.typeIdsOff);
159        for (int i = 0; i < count; i++) {
160            mTypeIds[i] = new TypeIdItem();
161            mTypeIds[i].descriptorIdx = readInt();
162
163            //System.out.println(i + ": " + mTypeIds[i].descriptorIdx +
164            //    " " + mStrings[mTypeIds[i].descriptorIdx]);
165        }
166    }
167
168    /**
169     * Loads the proto ID list.
170     */
171    void loadProtoIds() throws IOException {
172        int count = mHeaderItem.protoIdsSize;
173        mProtoIds = new ProtoIdItem[count];
174
175        //System.out.println("reading " + count + " protoIds");
176        seek(mHeaderItem.protoIdsOff);
177
178        /*
179         * Read the proto ID items.
180         */
181        for (int i = 0; i < count; i++) {
182            mProtoIds[i] = new ProtoIdItem();
183            mProtoIds[i].shortyIdx = readInt();
184            mProtoIds[i].returnTypeIdx = readInt();
185            mProtoIds[i].parametersOff = readInt();
186
187            //System.out.println(i + ": " + mProtoIds[i].shortyIdx +
188            //    " " + mStrings[mProtoIds[i].shortyIdx]);
189        }
190
191        /*
192         * Go back through and read the type lists.
193         */
194        for (int i = 0; i < count; i++) {
195            ProtoIdItem protoId = mProtoIds[i];
196
197            int offset = protoId.parametersOff;
198
199            if (offset == 0) {
200                protoId.types = new int[0];
201                continue;
202            } else {
203                seek(offset);
204                int size = readInt();       // #of entries in list
205                protoId.types = new int[size];
206
207                for (int j = 0; j < size; j++) {
208                    protoId.types[j] = readShort() & 0xffff;
209                }
210            }
211        }
212    }
213
214    /**
215     * Loads the field ID list.
216     */
217    void loadFieldIds() throws IOException {
218        int count = mHeaderItem.fieldIdsSize;
219        mFieldIds = new FieldIdItem[count];
220
221        //System.out.println("reading " + count + " fieldIds");
222        seek(mHeaderItem.fieldIdsOff);
223        for (int i = 0; i < count; i++) {
224            mFieldIds[i] = new FieldIdItem();
225            mFieldIds[i].classIdx = readShort() & 0xffff;
226            mFieldIds[i].typeIdx = readShort() & 0xffff;
227            mFieldIds[i].nameIdx = readInt();
228
229            //System.out.println(i + ": " + mFieldIds[i].nameIdx +
230            //    " " + mStrings[mFieldIds[i].nameIdx]);
231        }
232    }
233
234    /**
235     * Loads the method ID list.
236     */
237    void loadMethodIds() throws IOException {
238        int count = mHeaderItem.methodIdsSize;
239        mMethodIds = new MethodIdItem[count];
240
241        //System.out.println("reading " + count + " methodIds");
242        seek(mHeaderItem.methodIdsOff);
243        for (int i = 0; i < count; i++) {
244            mMethodIds[i] = new MethodIdItem();
245            mMethodIds[i].classIdx = readShort() & 0xffff;
246            mMethodIds[i].protoIdx = readShort() & 0xffff;
247            mMethodIds[i].nameIdx = readInt();
248
249            //System.out.println(i + ": " + mMethodIds[i].nameIdx +
250            //    " " + mStrings[mMethodIds[i].nameIdx]);
251        }
252    }
253
254    /**
255     * Loads the class defs list.
256     */
257    void loadClassDefs() throws IOException {
258        int count = mHeaderItem.classDefsSize;
259        mClassDefs = new ClassDefItem[count];
260
261        //System.out.println("reading " + count + " classDefs");
262        seek(mHeaderItem.classDefsOff);
263        for (int i = 0; i < count; i++) {
264            mClassDefs[i] = new ClassDefItem();
265            mClassDefs[i].classIdx = readInt();
266
267            /* access_flags = */ readInt();
268            /* superclass_idx = */ readInt();
269            /* interfaces_off = */ readInt();
270            /* source_file_idx = */ readInt();
271            /* annotations_off = */ readInt();
272            /* class_data_off = */ readInt();
273            /* static_values_off = */ readInt();
274
275            //System.out.println(i + ": " + mClassDefs[i].classIdx + " " +
276            //    mStrings[mTypeIds[mClassDefs[i].classIdx].descriptorIdx]);
277        }
278    }
279
280    /**
281     * Sets the "internal" flag on type IDs which are defined in the
282     * DEX file or within the VM (e.g. primitive classes and arrays).
283     */
284    void markInternalClasses() {
285        for (int i = mClassDefs.length -1; i >= 0; i--) {
286            mTypeIds[mClassDefs[i].classIdx].internal = true;
287        }
288
289        for (int i = 0; i < mTypeIds.length; i++) {
290            String className = mStrings[mTypeIds[i].descriptorIdx];
291
292            if (className.length() == 1) {
293                // primitive class
294                mTypeIds[i].internal = true;
295            } else if (className.charAt(0) == '[') {
296                mTypeIds[i].internal = true;
297            }
298
299            //System.out.println(i + " " +
300            //    (mTypeIds[i].internal ? "INTERNAL" : "external") + " - " +
301            //    mStrings[mTypeIds[i].descriptorIdx]);
302        }
303    }
304
305
306    /*
307     * =======================================================================
308     *      Queries
309     * =======================================================================
310     */
311
312    /**
313     * Converts a single-character primitive type into its human-readable
314     * equivalent.
315     */
316    private String primitiveTypeLabel(char typeChar) {
317        /* primitive type; substitute human-readable name in */
318        switch (typeChar) {
319            case 'B':   return "byte";
320            case 'C':   return "char";
321            case 'D':   return "double";
322            case 'F':   return "float";
323            case 'I':   return "int";
324            case 'J':   return "long";
325            case 'S':   return "short";
326            case 'V':   return "void";
327            case 'Z':   return "boolean";
328            default:
329                /* huh? */
330                System.err.println("Unexpected class char " + typeChar);
331                assert false;
332                return "UNKNOWN";
333        }
334    }
335
336    /**
337     * Converts a descriptor to dotted form.  For example,
338     * "Ljava/lang/String;" becomes "java.lang.String", and "[I" becomes
339     * "int[].
340     */
341    private String descriptorToDot(String descr) {
342        int targetLen = descr.length();
343        int offset = 0;
344        int arrayDepth = 0;
345
346        /* strip leading [s; will be added to end */
347        while (targetLen > 1 && descr.charAt(offset) == '[') {
348            offset++;
349            targetLen--;
350        }
351        arrayDepth = offset;
352
353        if (targetLen == 1) {
354            descr = primitiveTypeLabel(descr.charAt(offset));
355            offset = 0;
356            targetLen = descr.length();
357        } else {
358            /* account for leading 'L' and trailing ';' */
359            if (targetLen >= 2 && descr.charAt(offset) == 'L' &&
360                descr.charAt(offset+targetLen-1) == ';')
361            {
362                targetLen -= 2;     /* two fewer chars to copy */
363                offset++;           /* skip the 'L' */
364            }
365        }
366
367        char[] buf = new char[targetLen + arrayDepth * 2];
368
369        /* copy class name over */
370        int i;
371        for (i = 0; i < targetLen; i++) {
372            char ch = descr.charAt(offset + i);
373            buf[i] = (ch == '/') ? '.' : ch;
374        }
375
376        /* add the appopriate number of brackets for arrays */
377        while (arrayDepth-- > 0) {
378            buf[i++] = '[';
379            buf[i++] = ']';
380        }
381        assert i == buf.length;
382
383        return new String(buf);
384    }
385
386    /**
387     * Returns the dot-form class name, given an index into the type_ids
388     * table.
389     */
390    private String classNameFromTypeIndex(int idx) {
391        String descriptor = mStrings[mTypeIds[idx].descriptorIdx];
392        return descriptorToDot(descriptor);
393    }
394
395    /**
396     * Returns the method prototype descriptor, given an index into the
397     * proto_ids table.
398     */
399    private String protoStringFromProtoIndex(int idx) {
400        StringBuilder builder = new StringBuilder();
401        ProtoIdItem protoId = mProtoIds[idx];
402
403        builder.append("(");
404        for (int i = 0; i < protoId.types.length; i++) {
405            String elem = mStrings[mTypeIds[protoId.types[i]].descriptorIdx];
406            builder.append(elem);
407        }
408
409        builder.append(")");
410        String ret = mStrings[mTypeIds[protoId.returnTypeIdx].descriptorIdx];
411        builder.append(ret);
412
413        return builder.toString();
414    }
415
416    /**
417     * Returns an array with all of the field references that don't
418     * correspond to classes in the DEX file.
419     */
420    public FieldRef[] getExternalFieldReferences() {
421        // get a count
422        int count = 0;
423        for (int i = 0; i < mFieldIds.length; i++) {
424            if (!mTypeIds[mFieldIds[i].classIdx].internal)
425                count++;
426        }
427
428        //System.out.println("count is " + count + " of " + mFieldIds.length);
429
430        FieldRef[] fieldRefs = new FieldRef[count];
431        count = 0;
432        for (int i = 0; i < mFieldIds.length; i++) {
433            if (!mTypeIds[mFieldIds[i].classIdx].internal) {
434                FieldIdItem fieldId = mFieldIds[i];
435                fieldRefs[count++] =
436                    new FieldRef(classNameFromTypeIndex(fieldId.classIdx),
437                                 classNameFromTypeIndex(fieldId.typeIdx),
438                                 mStrings[fieldId.nameIdx]);
439            }
440        }
441
442        assert count == fieldRefs.length;
443
444        return fieldRefs;
445    }
446
447    /**
448     * Returns an array with all of the method references that don't
449     * correspond to classes in the DEX file.
450     */
451    public MethodRef[] getExternalMethodReferences() {
452        // get a count
453        int count = 0;
454        for (int i = 0; i < mMethodIds.length; i++) {
455            if (!mTypeIds[mMethodIds[i].classIdx].internal)
456                count++;
457        }
458
459        //System.out.println("count is " + count + " of " + mMethodIds.length);
460
461        MethodRef[] methodRefs = new MethodRef[count];
462        count = 0;
463        for (int i = 0; i < mMethodIds.length; i++) {
464            if (!mTypeIds[mMethodIds[i].classIdx].internal) {
465                MethodIdItem methodId = mMethodIds[i];
466                methodRefs[count++] =
467                    new MethodRef(classNameFromTypeIndex(methodId.classIdx),
468                                 protoStringFromProtoIndex(methodId.protoIdx),
469                                 mStrings[methodId.nameIdx]);
470            }
471        }
472
473        assert count == methodRefs.length;
474
475        return methodRefs;
476    }
477
478    /*
479     * =======================================================================
480     *      Basic I/O functions
481     * =======================================================================
482     */
483
484    /**
485     * Seeks the DEX file to the specified absolute position.
486     */
487    void seek(int position) throws IOException {
488        mDexFile.seek(position);
489    }
490
491    /**
492     * Fills the buffer by reading bytes from the DEX file.
493     */
494    void readBytes(byte[] buffer) throws IOException {
495        mDexFile.readFully(buffer);
496    }
497
498    /**
499     * Reads a single signed byte value.
500     */
501    byte readByte() throws IOException {
502        mDexFile.readFully(tmpBuf, 0, 1);
503        return tmpBuf[0];
504    }
505
506    /**
507     * Reads a signed 16-bit integer, byte-swapping if necessary.
508     */
509    short readShort() throws IOException {
510        mDexFile.readFully(tmpBuf, 0, 2);
511        if (isBigEndian) {
512            return (short) ((tmpBuf[1] & 0xff) | ((tmpBuf[0] & 0xff) << 8));
513        } else {
514            return (short) ((tmpBuf[0] & 0xff) | ((tmpBuf[1] & 0xff) << 8));
515        }
516    }
517
518    /**
519     * Reads a signed 32-bit integer, byte-swapping if necessary.
520     */
521    int readInt() throws IOException {
522        mDexFile.readFully(tmpBuf, 0, 4);
523
524        if (isBigEndian) {
525            return (tmpBuf[3] & 0xff) | ((tmpBuf[2] & 0xff) << 8) |
526                   ((tmpBuf[1] & 0xff) << 16) | ((tmpBuf[0] & 0xff) << 24);
527        } else {
528            return (tmpBuf[0] & 0xff) | ((tmpBuf[1] & 0xff) << 8) |
529                   ((tmpBuf[2] & 0xff) << 16) | ((tmpBuf[3] & 0xff) << 24);
530        }
531    }
532
533    /**
534     * Reads a variable-length unsigned LEB128 value.  Does not attempt to
535     * verify that the value is valid.
536     *
537     * @throws EOFException if we run off the end of the file
538     */
539    int readUnsignedLeb128() throws IOException {
540        int result = 0;
541        byte val;
542
543        do {
544            val = readByte();
545            result = (result << 7) | (val & 0x7f);
546        } while (val < 0);
547
548        return result;
549    }
550
551    /**
552     * Reads a UTF-8 string.
553     *
554     * We don't know how long the UTF-8 string is, so we have to read one
555     * byte at a time.  We could make an educated guess based on the
556     * utf16_size and seek back if we get it wrong, but seeking backward
557     * may cause the underlying implementation to reload I/O buffers.
558     */
559    String readString() throws IOException {
560        int utf16len = readUnsignedLeb128();
561        byte inBuf[] = new byte[utf16len * 3];      // worst case
562        int idx;
563
564        for (idx = 0; idx < inBuf.length; idx++) {
565            byte val = readByte();
566            if (val == 0)
567                break;
568            inBuf[idx] = val;
569        }
570
571        return new String(inBuf, 0, idx, "UTF-8");
572    }
573
574
575    /*
576     * =======================================================================
577     *      Internal "structure" declarations
578     * =======================================================================
579     */
580
581    /**
582     * Holds the contents of a header_item.
583     */
584    static class HeaderItem {
585        public int fileSize;
586        public int headerSize;
587        public int endianTag;
588        public int stringIdsSize, stringIdsOff;
589        public int typeIdsSize, typeIdsOff;
590        public int protoIdsSize, protoIdsOff;
591        public int fieldIdsSize, fieldIdsOff;
592        public int methodIdsSize, methodIdsOff;
593        public int classDefsSize, classDefsOff;
594
595        /* expected magic values */
596        public static final byte[] DEX_FILE_MAGIC = {
597            0x64, 0x65, 0x78, 0x0a, 0x30, 0x33, 0x35, 0x00 };
598        public static final int ENDIAN_CONSTANT = 0x12345678;
599        public static final int REVERSE_ENDIAN_CONSTANT = 0x78563412;
600    }
601
602    /**
603     * Holds the contents of a type_id_item.
604     *
605     * This is chiefly a list of indices into the string table.  We need
606     * some additional bits of data, such as whether or not the type ID
607     * represents a class defined in this DEX, so we use an object for
608     * each instead of a simple integer.  (Could use a parallel array, but
609     * since this is a desktop app it's not essential.)
610     */
611    static class TypeIdItem {
612        public int descriptorIdx;       // index into string_ids
613
614        public boolean internal;        // defined within this DEX file?
615    }
616
617    /**
618     * Holds the contents of a proto_id_item.
619     */
620    static class ProtoIdItem {
621        public int shortyIdx;           // index into string_ids
622        public int returnTypeIdx;       // index into type_ids
623        public int parametersOff;       // file offset to a type_list
624
625        public int types[];             // contents of type list
626    }
627
628    /**
629     * Holds the contents of a field_id_item.
630     */
631    static class FieldIdItem {
632        public int classIdx;            // index into type_ids (defining class)
633        public int typeIdx;             // index into type_ids (field type)
634        public int nameIdx;             // index into string_ids
635    }
636
637    /**
638     * Holds the contents of a method_id_item.
639     */
640    static class MethodIdItem {
641        public int classIdx;            // index into type_ids
642        public int protoIdx;            // index into proto_ids
643        public int nameIdx;             // index into string_ids
644    }
645
646    /**
647     * Holds the contents of a class_def_item.
648     *
649     * We don't really need a class for this, but there's some stuff in
650     * the class_def_item that we might want later.
651     */
652    static class ClassDefItem {
653        public int classIdx;            // index into type_ids
654    }
655}
656
657