DexData.java revision 12d6d4c0ea192b6a924df0df1e3b14ce1ed5793b
1/* 2 * Copyright (C) 2009 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17package com.android.dexdeps; 18 19import java.io.IOException; 20import java.io.RandomAccessFile; 21import java.util.Arrays; 22 23/** 24 * Data extracted from a DEX file. 25 */ 26public class DexData { 27 private RandomAccessFile mDexFile; 28 private HeaderItem mHeaderItem; 29 private String[] mStrings; // strings from string_data_* 30 private TypeIdItem[] mTypeIds; 31 private ProtoIdItem[] mProtoIds; 32 private FieldIdItem[] mFieldIds; 33 private MethodIdItem[] mMethodIds; 34 private ClassDefItem[] mClassDefs; 35 36 private byte tmpBuf[] = new byte[4]; 37 private boolean isBigEndian = false; 38 39 /** 40 * Constructs a new DexData for this file. 41 */ 42 public DexData(RandomAccessFile raf) { 43 mDexFile = raf; 44 } 45 46 /** 47 * Loads the contents of the DEX file into our data structures. 48 * 49 * @throws IOException if we encounter a problem while reading 50 * @throws DexDataException if the DEX contents look bad 51 */ 52 public void load() throws IOException { 53 parseHeaderItem(); 54 55 loadStrings(); 56 loadTypeIds(); 57 loadProtoIds(); 58 loadFieldIds(); 59 loadMethodIds(); 60 loadClassDefs(); 61 62 markInternalClasses(); 63 } 64 65 66 /** 67 * Parses the interesting bits out of the header. 68 */ 69 void parseHeaderItem() throws IOException { 70 mHeaderItem = new HeaderItem(); 71 72 seek(0); 73 74 byte[] magic = new byte[8]; 75 readBytes(magic); 76 if (!Arrays.equals(magic, HeaderItem.DEX_FILE_MAGIC)) { 77 System.err.println("Magic number is wrong -- are you sure " + 78 "this is a DEX file?"); 79 throw new DexDataException(); 80 } 81 82 /* 83 * Read the endian tag, so we properly swap things as we read 84 * them from here on. 85 */ 86 seek(8+4+20+4+4); 87 mHeaderItem.endianTag = readInt(); 88 if (mHeaderItem.endianTag == HeaderItem.ENDIAN_CONSTANT) { 89 /* do nothing */ 90 } else if (mHeaderItem.endianTag == HeaderItem.REVERSE_ENDIAN_CONSTANT){ 91 /* file is big-endian (!), reverse future reads */ 92 isBigEndian = true; 93 } else { 94 System.err.println("Endian constant has unexpected value " + 95 Integer.toHexString(mHeaderItem.endianTag)); 96 throw new DexDataException(); 97 } 98 99 seek(8+4+20); // magic, checksum, signature 100 mHeaderItem.fileSize = readInt(); 101 mHeaderItem.headerSize = readInt(); 102 /*mHeaderItem.endianTag =*/ readInt(); 103 /*mHeaderItem.linkSize =*/ readInt(); 104 /*mHeaderItem.linkOff =*/ readInt(); 105 /*mHeaderItem.mapOff =*/ readInt(); 106 mHeaderItem.stringIdsSize = readInt(); 107 mHeaderItem.stringIdsOff = readInt(); 108 mHeaderItem.typeIdsSize = readInt(); 109 mHeaderItem.typeIdsOff = readInt(); 110 mHeaderItem.protoIdsSize = readInt(); 111 mHeaderItem.protoIdsOff = readInt(); 112 mHeaderItem.fieldIdsSize = readInt(); 113 mHeaderItem.fieldIdsOff = readInt(); 114 mHeaderItem.methodIdsSize = readInt(); 115 mHeaderItem.methodIdsOff = readInt(); 116 mHeaderItem.classDefsSize = readInt(); 117 mHeaderItem.classDefsOff = readInt(); 118 /*mHeaderItem.dataSize =*/ readInt(); 119 /*mHeaderItem.dataOff =*/ readInt(); 120 } 121 122 /** 123 * Loads the string table out of the DEX. 124 * 125 * First we read all of the string_id_items, then we read all of the 126 * string_data_item. Doing it this way should allow us to avoid 127 * seeking around in the file. 128 */ 129 void loadStrings() throws IOException { 130 int count = mHeaderItem.stringIdsSize; 131 int stringOffsets[] = new int[count]; 132 133 //System.out.println("reading " + count + " strings"); 134 135 seek(mHeaderItem.stringIdsOff); 136 for (int i = 0; i < count; i++) { 137 stringOffsets[i] = readInt(); 138 } 139 140 mStrings = new String[count]; 141 142 seek(stringOffsets[0]); 143 for (int i = 0; i < count; i++) { 144 seek(stringOffsets[i]); // should be a no-op 145 mStrings[i] = readString(); 146 //System.out.println("STR: " + i + ": " + mStrings[i]); 147 } 148 } 149 150 /** 151 * Loads the type ID list. 152 */ 153 void loadTypeIds() throws IOException { 154 int count = mHeaderItem.typeIdsSize; 155 mTypeIds = new TypeIdItem[count]; 156 157 //System.out.println("reading " + count + " typeIds"); 158 seek(mHeaderItem.typeIdsOff); 159 for (int i = 0; i < count; i++) { 160 mTypeIds[i] = new TypeIdItem(); 161 mTypeIds[i].descriptorIdx = readInt(); 162 163 //System.out.println(i + ": " + mTypeIds[i].descriptorIdx + 164 // " " + mStrings[mTypeIds[i].descriptorIdx]); 165 } 166 } 167 168 /** 169 * Loads the proto ID list. 170 */ 171 void loadProtoIds() throws IOException { 172 int count = mHeaderItem.protoIdsSize; 173 mProtoIds = new ProtoIdItem[count]; 174 175 //System.out.println("reading " + count + " protoIds"); 176 seek(mHeaderItem.protoIdsOff); 177 178 /* 179 * Read the proto ID items. 180 */ 181 for (int i = 0; i < count; i++) { 182 mProtoIds[i] = new ProtoIdItem(); 183 mProtoIds[i].shortyIdx = readInt(); 184 mProtoIds[i].returnTypeIdx = readInt(); 185 mProtoIds[i].parametersOff = readInt(); 186 187 //System.out.println(i + ": " + mProtoIds[i].shortyIdx + 188 // " " + mStrings[mProtoIds[i].shortyIdx]); 189 } 190 191 /* 192 * Go back through and read the type lists. 193 */ 194 for (int i = 0; i < count; i++) { 195 ProtoIdItem protoId = mProtoIds[i]; 196 197 int offset = protoId.parametersOff; 198 199 if (offset == 0) { 200 protoId.types = new int[0]; 201 continue; 202 } else { 203 seek(offset); 204 int size = readInt(); // #of entries in list 205 protoId.types = new int[size]; 206 207 for (int j = 0; j < size; j++) { 208 protoId.types[j] = readShort() & 0xffff; 209 } 210 } 211 } 212 } 213 214 /** 215 * Loads the field ID list. 216 */ 217 void loadFieldIds() throws IOException { 218 int count = mHeaderItem.fieldIdsSize; 219 mFieldIds = new FieldIdItem[count]; 220 221 //System.out.println("reading " + count + " fieldIds"); 222 seek(mHeaderItem.fieldIdsOff); 223 for (int i = 0; i < count; i++) { 224 mFieldIds[i] = new FieldIdItem(); 225 mFieldIds[i].classIdx = readShort() & 0xffff; 226 mFieldIds[i].typeIdx = readShort() & 0xffff; 227 mFieldIds[i].nameIdx = readInt(); 228 229 //System.out.println(i + ": " + mFieldIds[i].nameIdx + 230 // " " + mStrings[mFieldIds[i].nameIdx]); 231 } 232 } 233 234 /** 235 * Loads the method ID list. 236 */ 237 void loadMethodIds() throws IOException { 238 int count = mHeaderItem.methodIdsSize; 239 mMethodIds = new MethodIdItem[count]; 240 241 //System.out.println("reading " + count + " methodIds"); 242 seek(mHeaderItem.methodIdsOff); 243 for (int i = 0; i < count; i++) { 244 mMethodIds[i] = new MethodIdItem(); 245 mMethodIds[i].classIdx = readShort() & 0xffff; 246 mMethodIds[i].protoIdx = readShort() & 0xffff; 247 mMethodIds[i].nameIdx = readInt(); 248 249 //System.out.println(i + ": " + mMethodIds[i].nameIdx + 250 // " " + mStrings[mMethodIds[i].nameIdx]); 251 } 252 } 253 254 /** 255 * Loads the class defs list. 256 */ 257 void loadClassDefs() throws IOException { 258 int count = mHeaderItem.classDefsSize; 259 mClassDefs = new ClassDefItem[count]; 260 261 //System.out.println("reading " + count + " classDefs"); 262 seek(mHeaderItem.classDefsOff); 263 for (int i = 0; i < count; i++) { 264 mClassDefs[i] = new ClassDefItem(); 265 mClassDefs[i].classIdx = readInt(); 266 267 /* access_flags = */ readInt(); 268 /* superclass_idx = */ readInt(); 269 /* interfaces_off = */ readInt(); 270 /* source_file_idx = */ readInt(); 271 /* annotations_off = */ readInt(); 272 /* class_data_off = */ readInt(); 273 /* static_values_off = */ readInt(); 274 275 //System.out.println(i + ": " + mClassDefs[i].classIdx + " " + 276 // mStrings[mTypeIds[mClassDefs[i].classIdx].descriptorIdx]); 277 } 278 } 279 280 /** 281 * Sets the "internal" flag on type IDs which are defined in the 282 * DEX file or within the VM (e.g. primitive classes and arrays). 283 */ 284 void markInternalClasses() { 285 for (int i = mClassDefs.length -1; i >= 0; i--) { 286 mTypeIds[mClassDefs[i].classIdx].internal = true; 287 } 288 289 for (int i = 0; i < mTypeIds.length; i++) { 290 String className = mStrings[mTypeIds[i].descriptorIdx]; 291 292 if (className.length() == 1) { 293 // primitive class 294 mTypeIds[i].internal = true; 295 } else if (className.charAt(0) == '[') { 296 mTypeIds[i].internal = true; 297 } 298 299 //System.out.println(i + " " + 300 // (mTypeIds[i].internal ? "INTERNAL" : "external") + " - " + 301 // mStrings[mTypeIds[i].descriptorIdx]); 302 } 303 } 304 305 306 /* 307 * ======================================================================= 308 * Queries 309 * ======================================================================= 310 */ 311 312 /** 313 * Converts a single-character primitive type into its human-readable 314 * equivalent. 315 */ 316 private String primitiveTypeLabel(char typeChar) { 317 /* primitive type; substitute human-readable name in */ 318 switch (typeChar) { 319 case 'B': return "byte"; 320 case 'C': return "char"; 321 case 'D': return "double"; 322 case 'F': return "float"; 323 case 'I': return "int"; 324 case 'J': return "long"; 325 case 'S': return "short"; 326 case 'V': return "void"; 327 case 'Z': return "boolean"; 328 default: 329 /* huh? */ 330 System.err.println("Unexpected class char " + typeChar); 331 assert false; 332 return "UNKNOWN"; 333 } 334 } 335 336 /** 337 * Converts a descriptor to dotted form. For example, 338 * "Ljava/lang/String;" becomes "java.lang.String", and "[I" becomes 339 * "int[]. 340 */ 341 private String descriptorToDot(String descr) { 342 int targetLen = descr.length(); 343 int offset = 0; 344 int arrayDepth = 0; 345 346 /* strip leading [s; will be added to end */ 347 while (targetLen > 1 && descr.charAt(offset) == '[') { 348 offset++; 349 targetLen--; 350 } 351 arrayDepth = offset; 352 353 if (targetLen == 1) { 354 descr = primitiveTypeLabel(descr.charAt(offset)); 355 offset = 0; 356 targetLen = descr.length(); 357 } else { 358 /* account for leading 'L' and trailing ';' */ 359 if (targetLen >= 2 && descr.charAt(offset) == 'L' && 360 descr.charAt(offset+targetLen-1) == ';') 361 { 362 targetLen -= 2; /* two fewer chars to copy */ 363 offset++; /* skip the 'L' */ 364 } 365 } 366 367 char[] buf = new char[targetLen + arrayDepth * 2]; 368 369 /* copy class name over */ 370 int i; 371 for (i = 0; i < targetLen; i++) { 372 char ch = descr.charAt(offset + i); 373 buf[i] = (ch == '/') ? '.' : ch; 374 } 375 376 /* add the appopriate number of brackets for arrays */ 377 while (arrayDepth-- > 0) { 378 buf[i++] = '['; 379 buf[i++] = ']'; 380 } 381 assert i == buf.length; 382 383 return new String(buf); 384 } 385 386 /** 387 * Returns the dot-form class name, given an index into the type_ids 388 * table. 389 */ 390 private String classNameFromTypeIndex(int idx) { 391 String descriptor = mStrings[mTypeIds[idx].descriptorIdx]; 392 return descriptorToDot(descriptor); 393 } 394 395 /** 396 * Returns the method prototype descriptor, given an index into the 397 * proto_ids table. 398 */ 399 private String protoStringFromProtoIndex(int idx) { 400 StringBuilder builder = new StringBuilder(); 401 ProtoIdItem protoId = mProtoIds[idx]; 402 403 builder.append("("); 404 for (int i = 0; i < protoId.types.length; i++) { 405 String elem = mStrings[mTypeIds[protoId.types[i]].descriptorIdx]; 406 builder.append(elem); 407 } 408 409 builder.append(")"); 410 String ret = mStrings[mTypeIds[protoId.returnTypeIdx].descriptorIdx]; 411 builder.append(ret); 412 413 return builder.toString(); 414 } 415 416 /** 417 * Returns an array with all of the field references that don't 418 * correspond to classes in the DEX file. 419 */ 420 public FieldRef[] getExternalFieldReferences() { 421 // get a count 422 int count = 0; 423 for (int i = 0; i < mFieldIds.length; i++) { 424 if (!mTypeIds[mFieldIds[i].classIdx].internal) 425 count++; 426 } 427 428 //System.out.println("count is " + count + " of " + mFieldIds.length); 429 430 FieldRef[] fieldRefs = new FieldRef[count]; 431 count = 0; 432 for (int i = 0; i < mFieldIds.length; i++) { 433 if (!mTypeIds[mFieldIds[i].classIdx].internal) { 434 FieldIdItem fieldId = mFieldIds[i]; 435 fieldRefs[count++] = 436 new FieldRef(classNameFromTypeIndex(fieldId.classIdx), 437 classNameFromTypeIndex(fieldId.typeIdx), 438 mStrings[fieldId.nameIdx]); 439 } 440 } 441 442 assert count == fieldRefs.length; 443 444 return fieldRefs; 445 } 446 447 /** 448 * Returns an array with all of the method references that don't 449 * correspond to classes in the DEX file. 450 */ 451 public MethodRef[] getExternalMethodReferences() { 452 // get a count 453 int count = 0; 454 for (int i = 0; i < mMethodIds.length; i++) { 455 if (!mTypeIds[mMethodIds[i].classIdx].internal) 456 count++; 457 } 458 459 //System.out.println("count is " + count + " of " + mMethodIds.length); 460 461 MethodRef[] methodRefs = new MethodRef[count]; 462 count = 0; 463 for (int i = 0; i < mMethodIds.length; i++) { 464 if (!mTypeIds[mMethodIds[i].classIdx].internal) { 465 MethodIdItem methodId = mMethodIds[i]; 466 methodRefs[count++] = 467 new MethodRef(classNameFromTypeIndex(methodId.classIdx), 468 protoStringFromProtoIndex(methodId.protoIdx), 469 mStrings[methodId.nameIdx]); 470 } 471 } 472 473 assert count == methodRefs.length; 474 475 return methodRefs; 476 } 477 478 /* 479 * ======================================================================= 480 * Basic I/O functions 481 * ======================================================================= 482 */ 483 484 /** 485 * Seeks the DEX file to the specified absolute position. 486 */ 487 void seek(int position) throws IOException { 488 mDexFile.seek(position); 489 } 490 491 /** 492 * Fills the buffer by reading bytes from the DEX file. 493 */ 494 void readBytes(byte[] buffer) throws IOException { 495 mDexFile.readFully(buffer); 496 } 497 498 /** 499 * Reads a single signed byte value. 500 */ 501 byte readByte() throws IOException { 502 mDexFile.readFully(tmpBuf, 0, 1); 503 return tmpBuf[0]; 504 } 505 506 /** 507 * Reads a signed 16-bit integer, byte-swapping if necessary. 508 */ 509 short readShort() throws IOException { 510 mDexFile.readFully(tmpBuf, 0, 2); 511 if (isBigEndian) { 512 return (short) ((tmpBuf[1] & 0xff) | ((tmpBuf[0] & 0xff) << 8)); 513 } else { 514 return (short) ((tmpBuf[0] & 0xff) | ((tmpBuf[1] & 0xff) << 8)); 515 } 516 } 517 518 /** 519 * Reads a signed 32-bit integer, byte-swapping if necessary. 520 */ 521 int readInt() throws IOException { 522 mDexFile.readFully(tmpBuf, 0, 4); 523 524 if (isBigEndian) { 525 return (tmpBuf[3] & 0xff) | ((tmpBuf[2] & 0xff) << 8) | 526 ((tmpBuf[1] & 0xff) << 16) | ((tmpBuf[0] & 0xff) << 24); 527 } else { 528 return (tmpBuf[0] & 0xff) | ((tmpBuf[1] & 0xff) << 8) | 529 ((tmpBuf[2] & 0xff) << 16) | ((tmpBuf[3] & 0xff) << 24); 530 } 531 } 532 533 /** 534 * Reads a variable-length unsigned LEB128 value. Does not attempt to 535 * verify that the value is valid. 536 * 537 * @throws EOFException if we run off the end of the file 538 */ 539 int readUnsignedLeb128() throws IOException { 540 int result = 0; 541 byte val; 542 543 do { 544 val = readByte(); 545 result = (result << 7) | (val & 0x7f); 546 } while (val < 0); 547 548 return result; 549 } 550 551 /** 552 * Reads a UTF-8 string. 553 * 554 * We don't know how long the UTF-8 string is, so we have to read one 555 * byte at a time. We could make an educated guess based on the 556 * utf16_size and seek back if we get it wrong, but seeking backward 557 * may cause the underlying implementation to reload I/O buffers. 558 */ 559 String readString() throws IOException { 560 int utf16len = readUnsignedLeb128(); 561 byte inBuf[] = new byte[utf16len * 3]; // worst case 562 int idx; 563 564 for (idx = 0; idx < inBuf.length; idx++) { 565 byte val = readByte(); 566 if (val == 0) 567 break; 568 inBuf[idx] = val; 569 } 570 571 return new String(inBuf, 0, idx, "UTF-8"); 572 } 573 574 575 /* 576 * ======================================================================= 577 * Internal "structure" declarations 578 * ======================================================================= 579 */ 580 581 /** 582 * Holds the contents of a header_item. 583 */ 584 static class HeaderItem { 585 public int fileSize; 586 public int headerSize; 587 public int endianTag; 588 public int stringIdsSize, stringIdsOff; 589 public int typeIdsSize, typeIdsOff; 590 public int protoIdsSize, protoIdsOff; 591 public int fieldIdsSize, fieldIdsOff; 592 public int methodIdsSize, methodIdsOff; 593 public int classDefsSize, classDefsOff; 594 595 /* expected magic values */ 596 public static final byte[] DEX_FILE_MAGIC = { 597 0x64, 0x65, 0x78, 0x0a, 0x30, 0x33, 0x35, 0x00 }; 598 public static final int ENDIAN_CONSTANT = 0x12345678; 599 public static final int REVERSE_ENDIAN_CONSTANT = 0x78563412; 600 } 601 602 /** 603 * Holds the contents of a type_id_item. 604 * 605 * This is chiefly a list of indices into the string table. We need 606 * some additional bits of data, such as whether or not the type ID 607 * represents a class defined in this DEX, so we use an object for 608 * each instead of a simple integer. (Could use a parallel array, but 609 * since this is a desktop app it's not essential.) 610 */ 611 static class TypeIdItem { 612 public int descriptorIdx; // index into string_ids 613 614 public boolean internal; // defined within this DEX file? 615 } 616 617 /** 618 * Holds the contents of a proto_id_item. 619 */ 620 static class ProtoIdItem { 621 public int shortyIdx; // index into string_ids 622 public int returnTypeIdx; // index into type_ids 623 public int parametersOff; // file offset to a type_list 624 625 public int types[]; // contents of type list 626 } 627 628 /** 629 * Holds the contents of a field_id_item. 630 */ 631 static class FieldIdItem { 632 public int classIdx; // index into type_ids (defining class) 633 public int typeIdx; // index into type_ids (field type) 634 public int nameIdx; // index into string_ids 635 } 636 637 /** 638 * Holds the contents of a method_id_item. 639 */ 640 static class MethodIdItem { 641 public int classIdx; // index into type_ids 642 public int protoIdx; // index into proto_ids 643 public int nameIdx; // index into string_ids 644 } 645 646 /** 647 * Holds the contents of a class_def_item. 648 * 649 * We don't really need a class for this, but there's some stuff in 650 * the class_def_item that we might want later. 651 */ 652 static class ClassDefItem { 653 public int classIdx; // index into type_ids 654 } 655} 656 657