DexFile.cpp revision a70a3d8faa8f7332549fa0c9ae2008d428e28606
1/* 2 * Copyright (C) 2008 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17/* 18 * Access the contents of a .dex file. 19 */ 20 21#include "DexFile.h" 22#include "DexOptData.h" 23#include "DexProto.h" 24#include "DexCatch.h" 25#include "Leb128.h" 26#include "sha1.h" 27#include "ZipArchive.h" 28 29#include <zlib.h> 30 31#include <stdlib.h> 32#include <stddef.h> 33#include <string.h> 34#include <fcntl.h> 35#include <errno.h> 36 37 38/* 39 * Verifying checksums is good, but it slows things down and causes us to 40 * touch every page. In the "optimized" world, it doesn't work at all, 41 * because we rewrite the contents. 42 */ 43static const bool kVerifyChecksum = false; 44static const bool kVerifySignature = false; 45 46/* (documented in header) */ 47char dexGetPrimitiveTypeDescriptorChar(PrimitiveType type) { 48 const char* string = dexGetPrimitiveTypeDescriptor(type); 49 50 return (string == NULL) ? '\0' : string[0]; 51} 52 53/* (documented in header) */ 54const char* dexGetPrimitiveTypeDescriptor(PrimitiveType type) { 55 switch (type) { 56 case PRIM_VOID: return "V"; 57 case PRIM_BOOLEAN: return "Z"; 58 case PRIM_BYTE: return "B"; 59 case PRIM_SHORT: return "S"; 60 case PRIM_CHAR: return "C"; 61 case PRIM_INT: return "I"; 62 case PRIM_LONG: return "J"; 63 case PRIM_FLOAT: return "F"; 64 case PRIM_DOUBLE: return "D"; 65 default: return NULL; 66 } 67 68 return NULL; 69} 70 71/* (documented in header) */ 72const char* dexGetBoxedTypeDescriptor(PrimitiveType type) { 73 switch (type) { 74 case PRIM_VOID: return NULL; 75 case PRIM_BOOLEAN: return "Ljava/lang/Boolean;"; 76 case PRIM_BYTE: return "Ljava/lang/Byte;"; 77 case PRIM_SHORT: return "Ljava/lang/Short;"; 78 case PRIM_CHAR: return "Ljava/lang/Character;"; 79 case PRIM_INT: return "Ljava/lang/Integer;"; 80 case PRIM_LONG: return "Ljava/lang/Long;"; 81 case PRIM_FLOAT: return "Ljava/lang/Float;"; 82 case PRIM_DOUBLE: return "Ljava/lang/Double;"; 83 default: return NULL; 84 } 85} 86 87/* (documented in header) */ 88PrimitiveType dexGetPrimitiveTypeFromDescriptorChar(char descriptorChar) { 89 switch (descriptorChar) { 90 case 'V': return PRIM_VOID; 91 case 'Z': return PRIM_BOOLEAN; 92 case 'B': return PRIM_BYTE; 93 case 'S': return PRIM_SHORT; 94 case 'C': return PRIM_CHAR; 95 case 'I': return PRIM_INT; 96 case 'J': return PRIM_LONG; 97 case 'F': return PRIM_FLOAT; 98 case 'D': return PRIM_DOUBLE; 99 default: return PRIM_NOT; 100 } 101} 102 103/* Return the UTF-8 encoded string with the specified string_id index, 104 * also filling in the UTF-16 size (number of 16-bit code points).*/ 105const char* dexStringAndSizeById(const DexFile* pDexFile, u4 idx, 106 u4* utf16Size) { 107 const DexStringId* pStringId = dexGetStringId(pDexFile, idx); 108 const u1* ptr = pDexFile->baseAddr + pStringId->stringDataOff; 109 110 *utf16Size = readUnsignedLeb128(&ptr); 111 return (const char*) ptr; 112} 113 114/* 115 * Format an SHA-1 digest for printing. tmpBuf must be able to hold at 116 * least kSHA1DigestOutputLen bytes. 117 */ 118const char* dvmSHA1DigestToStr(const unsigned char digest[], char* tmpBuf); 119 120/* 121 * Compute a SHA-1 digest on a range of bytes. 122 */ 123static void dexComputeSHA1Digest(const unsigned char* data, size_t length, 124 unsigned char digest[]) 125{ 126 SHA1_CTX context; 127 SHA1Init(&context); 128 SHA1Update(&context, data, length); 129 SHA1Final(digest, &context); 130} 131 132/* 133 * Format the SHA-1 digest into the buffer, which must be able to hold at 134 * least kSHA1DigestOutputLen bytes. Returns a pointer to the buffer, 135 */ 136static const char* dexSHA1DigestToStr(const unsigned char digest[],char* tmpBuf) 137{ 138 static const char hexDigit[] = "0123456789abcdef"; 139 char* cp; 140 int i; 141 142 cp = tmpBuf; 143 for (i = 0; i < kSHA1DigestLen; i++) { 144 *cp++ = hexDigit[digest[i] >> 4]; 145 *cp++ = hexDigit[digest[i] & 0x0f]; 146 } 147 *cp++ = '\0'; 148 149 assert(cp == tmpBuf + kSHA1DigestOutputLen); 150 151 return tmpBuf; 152} 153 154/* 155 * Compute a hash code on a UTF-8 string, for use with internal hash tables. 156 * 157 * This may or may not be compatible with UTF-8 hash functions used inside 158 * the Dalvik VM. 159 * 160 * The basic "multiply by 31 and add" approach does better on class names 161 * than most other things tried (e.g. adler32). 162 */ 163static u4 classDescriptorHash(const char* str) 164{ 165 u4 hash = 1; 166 167 while (*str != '\0') 168 hash = hash * 31 + *str++; 169 170 return hash; 171} 172 173/* 174 * Add an entry to the class lookup table. We hash the string and probe 175 * until we find an open slot. 176 */ 177static void classLookupAdd(DexFile* pDexFile, DexClassLookup* pLookup, 178 int stringOff, int classDefOff, int* pNumProbes) 179{ 180 const char* classDescriptor = 181 (const char*) (pDexFile->baseAddr + stringOff); 182 const DexClassDef* pClassDef = 183 (const DexClassDef*) (pDexFile->baseAddr + classDefOff); 184 u4 hash = classDescriptorHash(classDescriptor); 185 int mask = pLookup->numEntries-1; 186 int idx = hash & mask; 187 188 /* 189 * Find the first empty slot. We oversized the table, so this is 190 * guaranteed to finish. 191 */ 192 int probes = 0; 193 while (pLookup->table[idx].classDescriptorOffset != 0) { 194 idx = (idx + 1) & mask; 195 probes++; 196 } 197 //if (probes > 1) 198 // LOGW("classLookupAdd: probes=%d\n", probes); 199 200 pLookup->table[idx].classDescriptorHash = hash; 201 pLookup->table[idx].classDescriptorOffset = stringOff; 202 pLookup->table[idx].classDefOffset = classDefOff; 203 *pNumProbes = probes; 204} 205 206/* 207 * Create the class lookup hash table. 208 * 209 * Returns newly-allocated storage. 210 */ 211DexClassLookup* dexCreateClassLookup(DexFile* pDexFile) 212{ 213 DexClassLookup* pLookup; 214 int allocSize; 215 int i, numEntries; 216 int numProbes, totalProbes, maxProbes; 217 218 numProbes = totalProbes = maxProbes = 0; 219 220 assert(pDexFile != NULL); 221 222 /* 223 * Using a factor of 3 results in far less probing than a factor of 2, 224 * but almost doubles the flash storage requirements for the bootstrap 225 * DEX files. The overall impact on class loading performance seems 226 * to be minor. We could probably get some performance improvement by 227 * using a secondary hash. 228 */ 229 numEntries = dexRoundUpPower2(pDexFile->pHeader->classDefsSize * 2); 230 allocSize = offsetof(DexClassLookup, table) 231 + numEntries * sizeof(pLookup->table[0]); 232 233 pLookup = (DexClassLookup*) calloc(1, allocSize); 234 if (pLookup == NULL) 235 return NULL; 236 pLookup->size = allocSize; 237 pLookup->numEntries = numEntries; 238 239 for (i = 0; i < (int)pDexFile->pHeader->classDefsSize; i++) { 240 const DexClassDef* pClassDef; 241 const char* pString; 242 243 pClassDef = dexGetClassDef(pDexFile, i); 244 pString = dexStringByTypeIdx(pDexFile, pClassDef->classIdx); 245 246 classLookupAdd(pDexFile, pLookup, 247 (u1*)pString - pDexFile->baseAddr, 248 (u1*)pClassDef - pDexFile->baseAddr, &numProbes); 249 250 if (numProbes > maxProbes) 251 maxProbes = numProbes; 252 totalProbes += numProbes; 253 } 254 255 LOGV("Class lookup: classes=%d slots=%d (%d%% occ) alloc=%d" 256 " total=%d max=%d\n", 257 pDexFile->pHeader->classDefsSize, numEntries, 258 (100 * pDexFile->pHeader->classDefsSize) / numEntries, 259 allocSize, totalProbes, maxProbes); 260 261 return pLookup; 262} 263 264 265/* 266 * Set up the basic raw data pointers of a DexFile. This function isn't 267 * meant for general use. 268 */ 269void dexFileSetupBasicPointers(DexFile* pDexFile, const u1* data) { 270 DexHeader *pHeader = (DexHeader*) data; 271 272 pDexFile->baseAddr = data; 273 pDexFile->pHeader = pHeader; 274 pDexFile->pStringIds = (const DexStringId*) (data + pHeader->stringIdsOff); 275 pDexFile->pTypeIds = (const DexTypeId*) (data + pHeader->typeIdsOff); 276 pDexFile->pFieldIds = (const DexFieldId*) (data + pHeader->fieldIdsOff); 277 pDexFile->pMethodIds = (const DexMethodId*) (data + pHeader->methodIdsOff); 278 pDexFile->pProtoIds = (const DexProtoId*) (data + pHeader->protoIdsOff); 279 pDexFile->pClassDefs = (const DexClassDef*) (data + pHeader->classDefsOff); 280 pDexFile->pLinkData = (const DexLink*) (data + pHeader->linkOff); 281} 282 283/* 284 * Parse an optimized or unoptimized .dex file sitting in memory. This is 285 * called after the byte-ordering and structure alignment has been fixed up. 286 * 287 * On success, return a newly-allocated DexFile. 288 */ 289DexFile* dexFileParse(const u1* data, size_t length, int flags) 290{ 291 DexFile* pDexFile = NULL; 292 const DexHeader* pHeader; 293 const u1* magic; 294 int result = -1; 295 296 if (length < sizeof(DexHeader)) { 297 LOGE("too short to be a valid .dex\n"); 298 goto bail; /* bad file format */ 299 } 300 301 pDexFile = (DexFile*) malloc(sizeof(DexFile)); 302 if (pDexFile == NULL) 303 goto bail; /* alloc failure */ 304 memset(pDexFile, 0, sizeof(DexFile)); 305 306 /* 307 * Peel off the optimized header. 308 */ 309 if (memcmp(data, DEX_OPT_MAGIC, 4) == 0) { 310 magic = data; 311 if (memcmp(magic+4, DEX_OPT_MAGIC_VERS, 4) != 0) { 312 LOGE("bad opt version (0x%02x %02x %02x %02x)\n", 313 magic[4], magic[5], magic[6], magic[7]); 314 goto bail; 315 } 316 317 pDexFile->pOptHeader = (const DexOptHeader*) data; 318 LOGV("Good opt header, DEX offset is %d, flags=0x%02x\n", 319 pDexFile->pOptHeader->dexOffset, pDexFile->pOptHeader->flags); 320 321 /* parse the optimized dex file tables */ 322 if (!dexParseOptData(data, length, pDexFile)) 323 goto bail; 324 325 /* ignore the opt header and appended data from here on out */ 326 data += pDexFile->pOptHeader->dexOffset; 327 length -= pDexFile->pOptHeader->dexOffset; 328 if (pDexFile->pOptHeader->dexLength > length) { 329 LOGE("File truncated? stored len=%d, rem len=%d\n", 330 pDexFile->pOptHeader->dexLength, (int) length); 331 goto bail; 332 } 333 length = pDexFile->pOptHeader->dexLength; 334 } 335 336 dexFileSetupBasicPointers(pDexFile, data); 337 pHeader = pDexFile->pHeader; 338 339 magic = pHeader->magic; 340 if (memcmp(magic, DEX_MAGIC, 4) != 0) { 341 /* not expected */ 342 LOGE("bad magic number (0x%02x %02x %02x %02x)\n", 343 magic[0], magic[1], magic[2], magic[3]); 344 goto bail; 345 } 346 if (memcmp(magic+4, DEX_MAGIC_VERS, 4) != 0) { 347 LOGE("bad dex version (0x%02x %02x %02x %02x)\n", 348 magic[4], magic[5], magic[6], magic[7]); 349 goto bail; 350 } 351 352 /* 353 * Verify the checksum(s). This is reasonably quick, but does require 354 * touching every byte in the DEX file. The base checksum changes after 355 * byte-swapping and DEX optimization. 356 */ 357 if (flags & kDexParseVerifyChecksum) { 358 u4 adler = dexComputeChecksum(pHeader); 359 if (adler != pHeader->checksum) { 360 LOGE("ERROR: bad checksum (%08x vs %08x)\n", 361 adler, pHeader->checksum); 362 if (!(flags & kDexParseContinueOnError)) 363 goto bail; 364 } else { 365 LOGV("+++ adler32 checksum (%08x) verified\n", adler); 366 } 367 368 const DexOptHeader* pOptHeader = pDexFile->pOptHeader; 369 if (pOptHeader != NULL) { 370 adler = dexComputeOptChecksum(pOptHeader); 371 if (adler != pOptHeader->checksum) { 372 LOGE("ERROR: bad opt checksum (%08x vs %08x)\n", 373 adler, pOptHeader->checksum); 374 if (!(flags & kDexParseContinueOnError)) 375 goto bail; 376 } else { 377 LOGV("+++ adler32 opt checksum (%08x) verified\n", adler); 378 } 379 } 380 } 381 382 /* 383 * Verify the SHA-1 digest. (Normally we don't want to do this -- 384 * the digest is used to uniquely identify the original DEX file, and 385 * can't be computed for verification after the DEX is byte-swapped 386 * and optimized.) 387 */ 388 if (kVerifySignature) { 389 unsigned char sha1Digest[kSHA1DigestLen]; 390 const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum) + 391 kSHA1DigestLen; 392 393 dexComputeSHA1Digest(data + nonSum, length - nonSum, sha1Digest); 394 if (memcmp(sha1Digest, pHeader->signature, kSHA1DigestLen) != 0) { 395 char tmpBuf1[kSHA1DigestOutputLen]; 396 char tmpBuf2[kSHA1DigestOutputLen]; 397 LOGE("ERROR: bad SHA1 digest (%s vs %s)\n", 398 dexSHA1DigestToStr(sha1Digest, tmpBuf1), 399 dexSHA1DigestToStr(pHeader->signature, tmpBuf2)); 400 if (!(flags & kDexParseContinueOnError)) 401 goto bail; 402 } else { 403 LOGV("+++ sha1 digest verified\n"); 404 } 405 } 406 407 if (pHeader->fileSize != length) { 408 LOGE("ERROR: stored file size (%d) != expected (%d)\n", 409 (int) pHeader->fileSize, (int) length); 410 if (!(flags & kDexParseContinueOnError)) 411 goto bail; 412 } 413 414 if (pHeader->classDefsSize == 0) { 415 LOGE("ERROR: DEX file has no classes in it, failing\n"); 416 goto bail; 417 } 418 419 /* 420 * Success! 421 */ 422 result = 0; 423 424bail: 425 if (result != 0 && pDexFile != NULL) { 426 dexFileFree(pDexFile); 427 pDexFile = NULL; 428 } 429 return pDexFile; 430} 431 432/* 433 * Free up the DexFile and any associated data structures. 434 * 435 * Note we may be called with a partially-initialized DexFile. 436 */ 437void dexFileFree(DexFile* pDexFile) 438{ 439 if (pDexFile == NULL) 440 return; 441 442 free(pDexFile); 443} 444 445/* 446 * Look up a class definition entry by descriptor. 447 * 448 * "descriptor" should look like "Landroid/debug/Stuff;". 449 */ 450const DexClassDef* dexFindClass(const DexFile* pDexFile, 451 const char* descriptor) 452{ 453 const DexClassLookup* pLookup = pDexFile->pClassLookup; 454 u4 hash; 455 int idx, mask; 456 457 hash = classDescriptorHash(descriptor); 458 mask = pLookup->numEntries - 1; 459 idx = hash & mask; 460 461 /* 462 * Search until we find a matching entry or an empty slot. 463 */ 464 while (true) { 465 int offset; 466 467 offset = pLookup->table[idx].classDescriptorOffset; 468 if (offset == 0) 469 return NULL; 470 471 if (pLookup->table[idx].classDescriptorHash == hash) { 472 const char* str; 473 474 str = (const char*) (pDexFile->baseAddr + offset); 475 if (strcmp(str, descriptor) == 0) { 476 return (const DexClassDef*) 477 (pDexFile->baseAddr + pLookup->table[idx].classDefOffset); 478 } 479 } 480 481 idx = (idx + 1) & mask; 482 } 483} 484 485 486/* 487 * Compute the DEX file checksum for a memory-mapped DEX file. 488 */ 489u4 dexComputeChecksum(const DexHeader* pHeader) 490{ 491 const u1* start = (const u1*) pHeader; 492 493 uLong adler = adler32(0L, Z_NULL, 0); 494 const int nonSum = sizeof(pHeader->magic) + sizeof(pHeader->checksum); 495 496 return (u4) adler32(adler, start + nonSum, pHeader->fileSize - nonSum); 497} 498 499/* 500 * Compute the size, in bytes, of a DexCode. 501 */ 502size_t dexGetDexCodeSize(const DexCode* pCode) 503{ 504 /* 505 * The catch handler data is the last entry. It has a variable number 506 * of variable-size pieces, so we need to create an iterator. 507 */ 508 u4 handlersSize; 509 u4 offset; 510 u4 ui; 511 512 if (pCode->triesSize != 0) { 513 handlersSize = dexGetHandlersSize(pCode); 514 offset = dexGetFirstHandlerOffset(pCode); 515 } else { 516 handlersSize = 0; 517 offset = 0; 518 } 519 520 for (ui = 0; ui < handlersSize; ui++) { 521 DexCatchIterator iterator; 522 dexCatchIteratorInit(&iterator, pCode, offset); 523 offset = dexCatchIteratorGetEndOffset(&iterator, pCode); 524 } 525 526 const u1* handlerData = dexGetCatchHandlerData(pCode); 527 528 //LOGD("+++ pCode=%p handlerData=%p last offset=%d\n", 529 // pCode, handlerData, offset); 530 531 /* return the size of the catch handler + everything before it */ 532 return (handlerData - (u1*) pCode) + offset; 533} 534 535/* 536 * Round up to the next highest power of 2. 537 * 538 * Found on http://graphics.stanford.edu/~seander/bithacks.html. 539 */ 540u4 dexRoundUpPower2(u4 val) 541{ 542 val--; 543 val |= val >> 1; 544 val |= val >> 2; 545 val |= val >> 4; 546 val |= val >> 8; 547 val |= val >> 16; 548 val++; 549 550 return val; 551} 552