RegisterMap.cpp revision c1a4ab9c313d8a3d12007f2dbef7b5a6fa4ac2ef
1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * This code generate "register maps" for Dalvik bytecode.  In a stack-based
19 * VM we might call these "stack maps".  They are used to increase the
20 * precision in the garbage collector when scanning references in the
21 * interpreter thread stacks.
22 */
23#include "Dalvik.h"
24#include "UniquePtr.h"
25#include "analysis/CodeVerify.h"
26#include "analysis/RegisterMap.h"
27#include "libdex/DexCatch.h"
28#include "libdex/InstrUtils.h"
29#include "libdex/Leb128.h"
30
31#include <stddef.h>
32
33/* double-check the compression */
34#define REGISTER_MAP_VERIFY     false
35
36/* verbose logging */
37#define REGISTER_MAP_VERBOSE    false
38
39//#define REGISTER_MAP_STATS
40
41// fwd
42static void outputTypeVector(const RegType* regs, int insnRegCount, u1* data);
43static bool verifyMap(VerifierData* vdata, const RegisterMap* pMap);
44static int compareMaps(const RegisterMap* pMap1, const RegisterMap* pMap2);
45
46#ifdef REGISTER_MAP_STATS
47static void computeMapStats(RegisterMap* pMap, const Method* method);
48#endif
49static RegisterMap* compressMapDifferential(const RegisterMap* pMap,\
50    const Method* meth);
51static RegisterMap* uncompressMapDifferential(const RegisterMap* pMap);
52
53#ifdef REGISTER_MAP_STATS
54/*
55 * Generate some statistics on the register maps we create and use.
56 */
57#define kMaxGcPointGap      50
58#define kUpdatePosnMinRegs  24
59#define kNumUpdatePosns     8
60#define kMaxDiffBits        20
61struct MapStats {
62    /*
63     * Buckets measuring the distance between GC points.  This tells us how
64     * many bits we need to encode the advancing program counter.  We ignore
65     * some of the "long tail" entries.
66     */
67    int gcPointGap[kMaxGcPointGap];
68
69    /*
70     * Number of gaps.  Equal to (number of gcPoints - number of methods),
71     * since the computation isn't including the initial gap.
72     */
73    int gcGapCount;
74
75    /*
76     * Number of gaps.
77     */
78    int totalGcPointCount;
79
80    /*
81     * For larger methods (>= 24 registers), measure in which octant register
82     * updates occur.  This should help us understand whether register
83     * changes tend to cluster in the low regs even for large methods.
84     */
85    int updatePosn[kNumUpdatePosns];
86
87    /*
88     * For all methods, count up the number of changes to registers < 16
89     * and >= 16.
90     */
91    int updateLT16;
92    int updateGE16;
93
94    /*
95     * Histogram of the number of bits that differ between adjacent entries.
96     */
97    int numDiffBits[kMaxDiffBits];
98
99
100    /*
101     * Track the number of expanded maps, and the heap space required to
102     * hold them.
103     */
104    int numExpandedMaps;
105    int totalExpandedMapSize;
106};
107#endif
108
109/*
110 * Prepare some things.
111 */
112bool dvmRegisterMapStartup()
113{
114#ifdef REGISTER_MAP_STATS
115    MapStats* pStats = calloc(1, sizeof(MapStats));
116    gDvm.registerMapStats = pStats;
117#endif
118    return true;
119}
120
121/*
122 * Clean up.
123 */
124void dvmRegisterMapShutdown()
125{
126#ifdef REGISTER_MAP_STATS
127    free(gDvm.registerMapStats);
128#endif
129}
130
131/*
132 * Write stats to log file.
133 */
134void dvmRegisterMapDumpStats()
135{
136#ifdef REGISTER_MAP_STATS
137    MapStats* pStats = (MapStats*) gDvm.registerMapStats;
138    int i, end;
139
140    for (end = kMaxGcPointGap-1; end >= 0; end--) {
141        if (pStats->gcPointGap[end] != 0)
142            break;
143    }
144
145    ALOGI("Register Map gcPointGap stats (diff count=%d, total=%d):",
146        pStats->gcGapCount, pStats->totalGcPointCount);
147    assert(pStats->gcPointGap[0] == 0);
148    for (i = 1; i <= end; i++) {
149        ALOGI(" %2d %d", i, pStats->gcPointGap[i]);
150    }
151
152
153    for (end = kMaxDiffBits-1; end >= 0; end--) {
154        if (pStats->numDiffBits[end] != 0)
155            break;
156    }
157
158    ALOGI("Register Map bit difference stats:");
159    for (i = 0; i <= end; i++) {
160        ALOGI(" %2d %d", i, pStats->numDiffBits[i]);
161    }
162
163
164    ALOGI("Register Map update position stats (lt16=%d ge16=%d):",
165        pStats->updateLT16, pStats->updateGE16);
166    for (i = 0; i < kNumUpdatePosns; i++) {
167        ALOGI(" %2d %d", i, pStats->updatePosn[i]);
168    }
169#endif
170}
171
172
173/*
174 * ===========================================================================
175 *      Map generation
176 * ===========================================================================
177 */
178
179/*
180 * Generate the register map for a method that has just been verified
181 * (i.e. we're doing this as part of verification).
182 *
183 * For type-precise determination we have all the data we need, so we
184 * just need to encode it in some clever fashion.
185 *
186 * Returns a pointer to a newly-allocated RegisterMap, or NULL on failure.
187 */
188RegisterMap* dvmGenerateRegisterMapV(VerifierData* vdata)
189{
190    static const int kHeaderSize = offsetof(RegisterMap, data);
191    RegisterMap* pMap = NULL;
192    RegisterMap* pResult = NULL;
193    RegisterMapFormat format;
194    u1 regWidth;
195    u1* mapData;
196    int i, bytesForAddr, gcPointCount;
197    int bufSize;
198
199    if (vdata->method->registersSize >= 2048) {
200        ALOGE("ERROR: register map can't handle %d registers",
201            vdata->method->registersSize);
202        goto bail;
203    }
204    regWidth = (vdata->method->registersSize + 7) / 8;
205
206    /*
207     * Decide if we need 8 or 16 bits to hold the address.  Strictly speaking
208     * we only need 16 bits if we actually encode an address >= 256 -- if
209     * the method has a section at the end without GC points (e.g. array
210     * data) we don't need to count it.  The situation is unusual, and
211     * detecting it requires scanning the entire method, so we don't bother.
212     */
213    if (vdata->insnsSize < 256) {
214        format = kRegMapFormatCompact8;
215        bytesForAddr = 1;
216    } else {
217        format = kRegMapFormatCompact16;
218        bytesForAddr = 2;
219    }
220
221    /*
222     * Count up the number of GC point instructions.
223     *
224     * NOTE: this does not automatically include the first instruction,
225     * since we don't count method entry as a GC point.
226     */
227    gcPointCount = 0;
228    for (i = 0; i < (int) vdata->insnsSize; i++) {
229        if (dvmInsnIsGcPoint(vdata->insnFlags, i))
230            gcPointCount++;
231    }
232    if (gcPointCount >= 65536) {
233        /* we could handle this, but in practice we don't get near this */
234        ALOGE("ERROR: register map can't handle %d gc points in one method",
235            gcPointCount);
236        goto bail;
237    }
238
239    /*
240     * Allocate a buffer to hold the map data.
241     */
242    bufSize = kHeaderSize + gcPointCount * (bytesForAddr + regWidth);
243
244    ALOGV("+++ grm: %s.%s (adr=%d gpc=%d rwd=%d bsz=%d)",
245        vdata->method->clazz->descriptor, vdata->method->name,
246        bytesForAddr, gcPointCount, regWidth, bufSize);
247
248    pMap = (RegisterMap*) malloc(bufSize);
249    dvmRegisterMapSetFormat(pMap, format);
250    dvmRegisterMapSetOnHeap(pMap, true);
251    dvmRegisterMapSetRegWidth(pMap, regWidth);
252    dvmRegisterMapSetNumEntries(pMap, gcPointCount);
253
254    /*
255     * Populate it.
256     */
257    mapData = pMap->data;
258    for (i = 0; i < (int) vdata->insnsSize; i++) {
259        if (dvmInsnIsGcPoint(vdata->insnFlags, i)) {
260            assert(vdata->registerLines[i].regTypes != NULL);
261            if (format == kRegMapFormatCompact8) {
262                *mapData++ = i;
263            } else /*kRegMapFormatCompact16*/ {
264                *mapData++ = i & 0xff;
265                *mapData++ = i >> 8;
266            }
267            outputTypeVector(vdata->registerLines[i].regTypes,
268                vdata->insnRegCount, mapData);
269            mapData += regWidth;
270        }
271    }
272
273    ALOGV("mapData=%p pMap=%p bufSize=%d", mapData, pMap, bufSize);
274    assert(mapData - (const u1*) pMap == bufSize);
275
276    if (REGISTER_MAP_VERIFY && !verifyMap(vdata, pMap))
277        goto bail;
278#ifdef REGISTER_MAP_STATS
279    computeMapStats(pMap, vdata->method);
280#endif
281
282    /*
283     * Try to compress the map.
284     */
285    RegisterMap* pCompMap;
286
287    pCompMap = compressMapDifferential(pMap, vdata->method);
288    if (pCompMap != NULL) {
289        if (REGISTER_MAP_VERIFY) {
290            /*
291             * Expand the compressed map we just created, and compare it
292             * to the original.  Abort the VM if it doesn't match up.
293             */
294            RegisterMap* pUncompMap;
295            pUncompMap = uncompressMapDifferential(pCompMap);
296            if (pUncompMap == NULL) {
297                ALOGE("Map failed to uncompress - %s.%s",
298                    vdata->method->clazz->descriptor,
299                    vdata->method->name);
300                free(pCompMap);
301                /* bad - compression is broken or we're out of memory */
302                dvmAbort();
303            } else {
304                if (compareMaps(pMap, pUncompMap) != 0) {
305                    ALOGE("Map comparison failed - %s.%s",
306                        vdata->method->clazz->descriptor,
307                        vdata->method->name);
308                    free(pCompMap);
309                    /* bad - compression is broken */
310                    dvmAbort();
311                }
312
313                /* verify succeeded */
314                delete pUncompMap;
315            }
316        }
317
318        if (REGISTER_MAP_VERBOSE) {
319            ALOGD("Good compress on %s.%s",
320                vdata->method->clazz->descriptor,
321                vdata->method->name);
322        }
323        free(pMap);
324        pMap = pCompMap;
325    } else {
326        if (REGISTER_MAP_VERBOSE) {
327            ALOGD("Unable to compress %s.%s (ent=%d rw=%d)",
328                vdata->method->clazz->descriptor,
329                vdata->method->name,
330                dvmRegisterMapGetNumEntries(pMap),
331                dvmRegisterMapGetRegWidth(pMap));
332        }
333    }
334
335    pResult = pMap;
336
337bail:
338    return pResult;
339}
340
341/*
342 * Release the storage held by a RegisterMap.
343 */
344void dvmFreeRegisterMap(RegisterMap* pMap)
345{
346    if (pMap == NULL)
347        return;
348
349    assert(dvmRegisterMapGetOnHeap(pMap));
350    free(pMap);
351}
352
353/*
354 * Determine if the RegType value is a reference type.
355 *
356 * Ordinarily we include kRegTypeZero in the "is it a reference"
357 * check.  There's no value in doing so here, because we know
358 * the register can't hold anything but zero.
359 */
360static inline bool isReferenceType(RegType type)
361{
362    return (type > kRegTypeMAX || type == kRegTypeUninit);
363}
364
365/*
366 * Given a line of registers, output a bit vector that indicates whether
367 * or not the register holds a reference type (which could be null).
368 *
369 * We use '1' to indicate it's a reference, '0' for anything else (numeric
370 * value, uninitialized data, merge conflict).  Register 0 will be found
371 * in the low bit of the first byte.
372 */
373static void outputTypeVector(const RegType* regs, int insnRegCount, u1* data)
374{
375    u1 val = 0;
376    int i;
377
378    for (i = 0; i < insnRegCount; i++) {
379        RegType type = *regs++;
380        val >>= 1;
381        if (isReferenceType(type))
382            val |= 0x80;        /* set hi bit */
383
384        if ((i & 0x07) == 7)
385            *data++ = val;
386    }
387    if ((i & 0x07) != 0) {
388        /* flush bits from last byte */
389        val >>= 8 - (i & 0x07);
390        *data++ = val;
391    }
392}
393
394/*
395 * Print the map as a series of binary strings.
396 *
397 * Pass in method->registersSize if known, or -1 if not.
398 */
399static void dumpRegisterMap(const RegisterMap* pMap, int registersSize)
400{
401    const u1* rawMap = pMap->data;
402    const RegisterMapFormat format = dvmRegisterMapGetFormat(pMap);
403    const int numEntries = dvmRegisterMapGetNumEntries(pMap);
404    const int regWidth = dvmRegisterMapGetRegWidth(pMap);
405    int addrWidth;
406
407    switch (format) {
408    case kRegMapFormatCompact8:
409        addrWidth = 1;
410        break;
411    case kRegMapFormatCompact16:
412        addrWidth = 2;
413        break;
414    default:
415        /* can't happen */
416        ALOGE("Can only dump Compact8 / Compact16 maps (not %d)", format);
417        return;
418    }
419
420    if (registersSize < 0)
421        registersSize = 8 * regWidth;
422    assert(registersSize <= regWidth * 8);
423
424    int ent;
425    for (ent = 0; ent < numEntries; ent++) {
426        int i, addr;
427
428        addr = *rawMap++;
429        if (addrWidth > 1)
430            addr |= (*rawMap++) << 8;
431
432        const u1* dataStart = rawMap;
433        u1 val = 0;
434
435        /* create binary string */
436        char outBuf[registersSize +1];
437        for (i = 0; i < registersSize; i++) {
438            val >>= 1;
439            if ((i & 0x07) == 0)
440                val = *rawMap++;
441
442            outBuf[i] = '0' + (val & 0x01);
443        }
444        outBuf[i] = '\0';
445
446        /* back up and create hex dump */
447        char hexBuf[regWidth * 3 +1];
448        char* cp = hexBuf;
449        rawMap = dataStart;
450        for (i = 0; i < regWidth; i++) {
451            sprintf(cp, " %02x", *rawMap++);
452            cp += 3;
453        }
454        hexBuf[i * 3] = '\0';
455
456        ALOGD("  %04x %s %s", addr, outBuf, hexBuf);
457    }
458}
459
460/*
461 * Double-check the map.
462 *
463 * We run through all of the data in the map, and compare it to the original.
464 * Only works on uncompressed data.
465 */
466static bool verifyMap(VerifierData* vdata, const RegisterMap* pMap)
467{
468    const u1* rawMap = pMap->data;
469    const RegisterMapFormat format = dvmRegisterMapGetFormat(pMap);
470    const int numEntries = dvmRegisterMapGetNumEntries(pMap);
471    int ent;
472    bool dumpMap = false;
473
474    if (false) {
475        const char* cd = "Landroid/net/http/Request;";
476        const char* mn = "readResponse";
477        if (strcmp(vdata->method->clazz->descriptor, cd) == 0 &&
478            strcmp(vdata->method->name, mn) == 0)
479        {
480            char* desc;
481            desc = dexProtoCopyMethodDescriptor(&vdata->method->prototype);
482            ALOGI("Map for %s.%s %s", vdata->method->clazz->descriptor,
483                vdata->method->name, desc);
484            free(desc);
485
486            dumpMap = true;
487        }
488    }
489
490    if ((vdata->method->registersSize + 7) / 8 != pMap->regWidth) {
491        ALOGE("GLITCH: registersSize=%d, regWidth=%d",
492            vdata->method->registersSize, pMap->regWidth);
493        return false;
494    }
495
496    for (ent = 0; ent < numEntries; ent++) {
497        int addr;
498
499        switch (format) {
500        case kRegMapFormatCompact8:
501            addr = *rawMap++;
502            break;
503        case kRegMapFormatCompact16:
504            addr = *rawMap++;
505            addr |= (*rawMap++) << 8;
506            break;
507        default:
508            /* shouldn't happen */
509            ALOGE("GLITCH: bad format (%d)", format);
510            dvmAbort();
511        }
512
513        const RegType* regs = vdata->registerLines[addr].regTypes;
514        if (regs == NULL) {
515            ALOGE("GLITCH: addr %d has no data", addr);
516            return false;
517        }
518
519        u1 val = 0;
520        int i;
521
522        for (i = 0; i < vdata->method->registersSize; i++) {
523            bool bitIsRef, regIsRef;
524
525            val >>= 1;
526            if ((i & 0x07) == 0) {
527                /* load next byte of data */
528                val = *rawMap++;
529            }
530
531            bitIsRef = val & 0x01;
532
533            RegType type = regs[i];
534            regIsRef = isReferenceType(type);
535
536            if (bitIsRef != regIsRef) {
537                ALOGE("GLITCH: addr %d reg %d: bit=%d reg=%d(%d)",
538                    addr, i, bitIsRef, regIsRef, type);
539                return false;
540            }
541        }
542
543        /* rawMap now points to the address field of the next entry */
544    }
545
546    if (dumpMap)
547        dumpRegisterMap(pMap, vdata->method->registersSize);
548
549    return true;
550}
551
552
553/*
554 * ===========================================================================
555 *      DEX generation & parsing
556 * ===========================================================================
557 */
558
559/*
560 * Advance "ptr" to ensure 32-bit alignment.
561 */
562static inline u1* align32(u1* ptr)
563{
564    return (u1*) (((int) ptr + 3) & ~0x03);
565}
566
567/*
568 * Compute the size, in bytes, of a register map.
569 */
570static size_t computeRegisterMapSize(const RegisterMap* pMap)
571{
572    static const int kHeaderSize = offsetof(RegisterMap, data);
573    u1 format = dvmRegisterMapGetFormat(pMap);
574    u2 numEntries = dvmRegisterMapGetNumEntries(pMap);
575
576    assert(pMap != NULL);
577
578    switch (format) {
579    case kRegMapFormatNone:
580        return 1;
581    case kRegMapFormatCompact8:
582        return kHeaderSize + (1 + pMap->regWidth) * numEntries;
583    case kRegMapFormatCompact16:
584        return kHeaderSize + (2 + pMap->regWidth) * numEntries;
585    case kRegMapFormatDifferential:
586        {
587            /* kHeaderSize + decoded ULEB128 length */
588            const u1* ptr = pMap->data;
589            int len = readUnsignedLeb128(&ptr);
590            return len + (ptr - (u1*) pMap);
591        }
592    default:
593        ALOGE("Bad register map format %d", format);
594        dvmAbort();
595        return 0;
596    }
597}
598
599/*
600 * Output the map for a single method, if it has one.
601 *
602 * Abstract and native methods have no map.  All others are expected to
603 * have one, since we know the class verified successfully.
604 *
605 * This strips the "allocated on heap" flag from the format byte, so that
606 * direct-mapped maps are correctly identified as such.
607 */
608static bool writeMapForMethod(const Method* meth, u1** pPtr)
609{
610    if (meth->registerMap == NULL) {
611        if (!dvmIsAbstractMethod(meth) && !dvmIsNativeMethod(meth)) {
612            ALOGW("Warning: no map available for %s.%s",
613                meth->clazz->descriptor, meth->name);
614            /* weird, but keep going */
615        }
616        *(*pPtr)++ = kRegMapFormatNone;
617        return true;
618    }
619
620    /* serialize map into the buffer */
621    size_t mapSize = computeRegisterMapSize(meth->registerMap);
622    memcpy(*pPtr, meth->registerMap, mapSize);
623
624    /* strip the "on heap" flag out of the format byte, which is always first */
625    assert(**pPtr == meth->registerMap->format);
626    **pPtr &= ~(kRegMapFormatOnHeap);
627
628    *pPtr += mapSize;
629
630    return true;
631}
632
633/*
634 * Write maps for all methods in the specified class to the buffer, which
635 * can hold at most "length" bytes.  "*pPtr" will be advanced past the end
636 * of the data we write.
637 */
638static bool writeMapsAllMethods(DvmDex* pDvmDex, const ClassObject* clazz,
639    u1** pPtr, size_t length)
640{
641    RegisterMapMethodPool* pMethodPool;
642    u1* ptr = *pPtr;
643    int i, methodCount;
644
645    /* artificial limit */
646    if (clazz->virtualMethodCount + clazz->directMethodCount >= 65536) {
647        ALOGE("Too many methods in %s", clazz->descriptor);
648        return false;
649    }
650
651    pMethodPool = (RegisterMapMethodPool*) ptr;
652    ptr += offsetof(RegisterMapMethodPool, methodData);
653    methodCount = 0;
654
655    /*
656     * Run through all methods, direct then virtual.  The class loader will
657     * traverse them in the same order.  (We could split them into two
658     * distinct pieces, but there doesn't appear to be any value in doing
659     * so other than that it makes class loading slightly less fragile.)
660     *
661     * The class loader won't know about miranda methods at the point
662     * where it parses this, so we omit those.
663     *
664     * TODO: consider omitting all native/abstract definitions.  Should be
665     * safe, though we lose the ability to sanity-check against the
666     * method counts in the DEX file.
667     */
668    for (i = 0; i < clazz->directMethodCount; i++) {
669        const Method* meth = &clazz->directMethods[i];
670        if (dvmIsMirandaMethod(meth))
671            continue;
672        if (!writeMapForMethod(&clazz->directMethods[i], &ptr)) {
673            return false;
674        }
675        methodCount++;
676        //ptr = align32(ptr);
677    }
678
679    for (i = 0; i < clazz->virtualMethodCount; i++) {
680        const Method* meth = &clazz->virtualMethods[i];
681        if (dvmIsMirandaMethod(meth))
682            continue;
683        if (!writeMapForMethod(&clazz->virtualMethods[i], &ptr)) {
684            return false;
685        }
686        methodCount++;
687        //ptr = align32(ptr);
688    }
689
690    pMethodPool->methodCount = methodCount;
691
692    *pPtr = ptr;
693    return true;
694}
695
696/*
697 * Write maps for all classes to the specified buffer, which can hold at
698 * most "length" bytes.
699 *
700 * Returns the actual length used, or 0 on failure.
701 */
702static size_t writeMapsAllClasses(DvmDex* pDvmDex, u1* basePtr, size_t length)
703{
704    DexFile* pDexFile = pDvmDex->pDexFile;
705    u4 count = pDexFile->pHeader->classDefsSize;
706    RegisterMapClassPool* pClassPool;
707    u4* offsetTable;
708    u1* ptr = basePtr;
709    u4 idx;
710
711    assert(gDvm.optimizing);
712
713    pClassPool = (RegisterMapClassPool*) ptr;
714    ptr += offsetof(RegisterMapClassPool, classDataOffset);
715    offsetTable = (u4*) ptr;
716    ptr += count * sizeof(u4);
717
718    pClassPool->numClasses = count;
719
720    /*
721     * We want an entry for every class, loaded or not.
722     */
723    for (idx = 0; idx < count; idx++) {
724        const DexClassDef* pClassDef;
725        const char* classDescriptor;
726        ClassObject* clazz;
727
728        pClassDef = dexGetClassDef(pDexFile, idx);
729        classDescriptor = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
730
731        /*
732         * All classes have been loaded into the bootstrap class loader.
733         * If we can find it, and it was successfully pre-verified, we
734         * run through its methods and add the register maps.
735         *
736         * If it wasn't pre-verified then we know it can't have any
737         * register maps.  Classes that can't be loaded or failed
738         * verification get an empty slot in the index.
739         */
740        clazz = NULL;
741        if ((pClassDef->accessFlags & CLASS_ISPREVERIFIED) != 0)
742            clazz = dvmLookupClass(classDescriptor, NULL, false);
743
744        if (clazz != NULL) {
745            offsetTable[idx] = ptr - basePtr;
746            LOGVV("%d -> offset %d (%p-%p)",
747                idx, offsetTable[idx], ptr, basePtr);
748
749            if (!writeMapsAllMethods(pDvmDex, clazz, &ptr,
750                    length - (ptr - basePtr)))
751            {
752                return 0;
753            }
754
755            ptr = align32(ptr);
756            LOGVV("Size %s (%d+%d methods): %d", clazz->descriptor,
757                clazz->directMethodCount, clazz->virtualMethodCount,
758                (ptr - basePtr) - offsetTable[idx]);
759        } else {
760            ALOGV("%4d NOT mapadding '%s'", idx, classDescriptor);
761            assert(offsetTable[idx] == 0);
762        }
763    }
764
765    if (ptr - basePtr >= (int)length) {
766        /* a bit late */
767        ALOGE("Buffer overrun");
768        dvmAbort();
769    }
770
771    return ptr - basePtr;
772}
773
774/*
775 * Generate a register map set for all verified classes in "pDvmDex".
776 */
777RegisterMapBuilder* dvmGenerateRegisterMaps(DvmDex* pDvmDex)
778{
779    RegisterMapBuilder* pBuilder;
780
781    pBuilder = (RegisterMapBuilder*) calloc(1, sizeof(RegisterMapBuilder));
782    if (pBuilder == NULL)
783        return NULL;
784
785    /*
786     * We have a couple of options here:
787     *  (1) Compute the size of the output, and malloc a buffer.
788     *  (2) Create a "large-enough" anonymous mmap region.
789     *
790     * The nice thing about option #2 is that we don't have to traverse
791     * all of the classes and methods twice.  The risk is that we might
792     * not make the region large enough.  Since the pages aren't mapped
793     * until used we can allocate a semi-absurd amount of memory without
794     * worrying about the effect on the rest of the system.
795     *
796     * The basic encoding on the largest jar file requires about 1MB of
797     * storage.  We map out 4MB here.  (TODO: guarantee that the last
798     * page of the mapping is marked invalid, so we reliably fail if
799     * we overrun.)
800     */
801    if (sysCreatePrivateMap(4 * 1024 * 1024, &pBuilder->memMap) != 0) {
802        free(pBuilder);
803        return NULL;
804    }
805
806    /*
807     * Create the maps.
808     */
809    size_t actual = writeMapsAllClasses(pDvmDex, (u1*)pBuilder->memMap.addr,
810                                        pBuilder->memMap.length);
811    if (actual == 0) {
812        dvmFreeRegisterMapBuilder(pBuilder);
813        return NULL;
814    }
815
816    ALOGV("TOTAL size of register maps: %d", actual);
817
818    pBuilder->data = pBuilder->memMap.addr;
819    pBuilder->size = actual;
820    return pBuilder;
821}
822
823/*
824 * Free the builder.
825 */
826void dvmFreeRegisterMapBuilder(RegisterMapBuilder* pBuilder)
827{
828    if (pBuilder == NULL)
829        return;
830
831    sysReleaseShmem(&pBuilder->memMap);
832    free(pBuilder);
833}
834
835
836/*
837 * Find the data for the specified class.
838 *
839 * If there's no register map data, or none for this class, we return NULL.
840 */
841const void* dvmRegisterMapGetClassData(const DexFile* pDexFile, u4 classIdx,
842    u4* pNumMaps)
843{
844    const RegisterMapClassPool* pClassPool;
845    const RegisterMapMethodPool* pMethodPool;
846
847    pClassPool = (const RegisterMapClassPool*) pDexFile->pRegisterMapPool;
848    if (pClassPool == NULL)
849        return NULL;
850
851    if (classIdx >= pClassPool->numClasses) {
852        ALOGE("bad class index (%d vs %d)", classIdx, pClassPool->numClasses);
853        dvmAbort();
854    }
855
856    u4 classOffset = pClassPool->classDataOffset[classIdx];
857    if (classOffset == 0) {
858        ALOGV("+++ no map for classIdx=%d", classIdx);
859        return NULL;
860    }
861
862    pMethodPool =
863        (const RegisterMapMethodPool*) (((u1*) pClassPool) + classOffset);
864    if (pNumMaps != NULL)
865        *pNumMaps = pMethodPool->methodCount;
866    return pMethodPool->methodData;
867}
868
869/*
870 * This advances "*pPtr" and returns its original value.
871 */
872const RegisterMap* dvmRegisterMapGetNext(const void** pPtr)
873{
874    const RegisterMap* pMap = (const RegisterMap*) *pPtr;
875
876    *pPtr = /*align32*/(((u1*) pMap) + computeRegisterMapSize(pMap));
877    LOGVV("getNext: %p -> %p (f=%#x w=%d e=%d)",
878        pMap, *pPtr, pMap->format, pMap->regWidth,
879        dvmRegisterMapGetNumEntries(pMap));
880    return pMap;
881}
882
883
884/*
885 * ===========================================================================
886 *      Utility functions
887 * ===========================================================================
888 */
889
890/*
891 * Return the data for the specified address, or NULL if not found.
892 *
893 * The result must be released with dvmReleaseRegisterMapLine().
894 */
895const u1* dvmRegisterMapGetLine(const RegisterMap* pMap, int addr)
896{
897    int addrWidth, lineWidth;
898    u1 format = dvmRegisterMapGetFormat(pMap);
899    u2 numEntries = dvmRegisterMapGetNumEntries(pMap);
900
901    assert(numEntries > 0);
902
903    switch (format) {
904    case kRegMapFormatNone:
905        return NULL;
906    case kRegMapFormatCompact8:
907        addrWidth = 1;
908        break;
909    case kRegMapFormatCompact16:
910        addrWidth = 2;
911        break;
912    default:
913        ALOGE("Unknown format %d", format);
914        dvmAbort();
915        return NULL;
916    }
917
918    lineWidth = addrWidth + pMap->regWidth;
919
920    /*
921     * Find the appropriate entry.  Many maps are very small, some are very
922     * large.
923     */
924    static const int kSearchThreshold = 8;
925    const u1* data = NULL;
926    int lineAddr;
927
928    if (numEntries < kSearchThreshold) {
929        int i;
930        data = pMap->data;
931        for (i = numEntries; i > 0; i--) {
932            lineAddr = data[0];
933            if (addrWidth > 1)
934                lineAddr |= data[1] << 8;
935            if (lineAddr == addr)
936                return data + addrWidth;
937
938            data += lineWidth;
939        }
940        assert(data == pMap->data + lineWidth * numEntries);
941    } else {
942        int hi, lo, mid;
943
944        lo = 0;
945        hi = numEntries -1;
946
947        while (hi >= lo) {
948            mid = (hi + lo) / 2;
949            data = pMap->data + lineWidth * mid;
950
951            lineAddr = data[0];
952            if (addrWidth > 1)
953                lineAddr |= data[1] << 8;
954
955            if (addr > lineAddr) {
956                lo = mid + 1;
957            } else if (addr < lineAddr) {
958                hi = mid - 1;
959            } else {
960                return data + addrWidth;
961            }
962        }
963    }
964
965    return NULL;
966}
967
968/*
969 * Compare two register maps.
970 *
971 * Returns 0 if they're equal, nonzero if not.
972 */
973static int compareMaps(const RegisterMap* pMap1, const RegisterMap* pMap2)
974{
975    size_t size1, size2;
976
977    size1 = computeRegisterMapSize(pMap1);
978    size2 = computeRegisterMapSize(pMap2);
979    if (size1 != size2) {
980        ALOGI("compareMaps: size mismatch (%zd vs %zd)", size1, size2);
981        return -1;
982    }
983
984    if (memcmp(pMap1, pMap2, size1) != 0) {
985        ALOGI("compareMaps: content mismatch");
986        return -1;
987    }
988
989    return 0;
990}
991
992
993/*
994 * Get the expanded form of the register map associated with the method.
995 *
996 * If the map is already in one of the uncompressed formats, we return
997 * immediately.  Otherwise, we expand the map and replace method's register
998 * map pointer, freeing it if it was allocated on the heap.
999 *
1000 * NOTE: this function is not synchronized; external locking is mandatory
1001 * (unless we're in the zygote, where single-threaded access is guaranteed).
1002 */
1003const RegisterMap* dvmGetExpandedRegisterMap0(Method* method)
1004{
1005    const RegisterMap* curMap = method->registerMap;
1006    RegisterMap* newMap;
1007
1008    if (curMap == NULL)
1009        return NULL;
1010
1011    /* sanity check to ensure this isn't called w/o external locking */
1012    /* (if we use this at a time other than during GC, fix/remove this test) */
1013    if (true) {
1014        if (!gDvm.zygote && dvmTryLockMutex(&gDvm.gcHeapLock) == 0) {
1015            ALOGE("GLITCH: dvmGetExpandedRegisterMap not called at GC time");
1016            dvmAbort();
1017        }
1018    }
1019
1020    RegisterMapFormat format = dvmRegisterMapGetFormat(curMap);
1021    switch (format) {
1022    case kRegMapFormatCompact8:
1023    case kRegMapFormatCompact16:
1024        if (REGISTER_MAP_VERBOSE) {
1025            if (dvmRegisterMapGetOnHeap(curMap)) {
1026                ALOGD("RegMap: already expanded: %s.%s",
1027                    method->clazz->descriptor, method->name);
1028            } else {
1029                ALOGD("RegMap: stored w/o compression: %s.%s",
1030                    method->clazz->descriptor, method->name);
1031            }
1032        }
1033        return curMap;
1034    case kRegMapFormatDifferential:
1035        newMap = uncompressMapDifferential(curMap);
1036        break;
1037    default:
1038        ALOGE("Unknown format %d in dvmGetExpandedRegisterMap", format);
1039        dvmAbort();
1040        newMap = NULL;      // make gcc happy
1041    }
1042
1043    if (newMap == NULL) {
1044        ALOGE("Map failed to uncompress (fmt=%d) %s.%s",
1045            format, method->clazz->descriptor, method->name);
1046        return NULL;
1047    }
1048
1049#ifdef REGISTER_MAP_STATS
1050    /*
1051     * Gather and display some stats.
1052     */
1053    {
1054        MapStats* pStats = (MapStats*) gDvm.registerMapStats;
1055        pStats->numExpandedMaps++;
1056        pStats->totalExpandedMapSize += computeRegisterMapSize(newMap);
1057        ALOGD("RMAP: count=%d size=%d",
1058            pStats->numExpandedMaps, pStats->totalExpandedMapSize);
1059    }
1060#endif
1061
1062    IF_ALOGV() {
1063        char* desc = dexProtoCopyMethodDescriptor(&method->prototype);
1064        ALOGV("Expanding map -> %s.%s:%s",
1065            method->clazz->descriptor, method->name, desc);
1066        free(desc);
1067    }
1068
1069    /*
1070     * Update method, and free compressed map if it was sitting on the heap.
1071     */
1072    dvmSetRegisterMap(method, newMap);
1073
1074    if (dvmRegisterMapGetOnHeap(curMap))
1075        dvmFreeRegisterMap((RegisterMap*) curMap);
1076
1077    return newMap;
1078}
1079
1080
1081/*
1082 * ===========================================================================
1083 *      Map compression
1084 * ===========================================================================
1085 */
1086
1087/*
1088Notes on map compression
1089
1090The idea is to create a compressed form that will be uncompressed before
1091use, with the output possibly saved in a cache.  This means we can use an
1092approach that is unsuited for random access if we choose.
1093
1094In the event that a map simply does not work with our compression scheme,
1095it's reasonable to store the map without compression.  In the future we
1096may want to have more than one compression scheme, and try each in turn,
1097retaining the best.  (We certainly want to keep the uncompressed form if it
1098turns out to be smaller or even slightly larger than the compressed form.)
1099
1100Each entry consists of an address and a bit vector.  Adjacent entries are
1101strongly correlated, suggesting differential encoding.
1102
1103
1104Ideally we would avoid outputting adjacent entries with identical
1105bit vectors.  However, the register values at a given address do not
1106imply anything about the set of valid registers at subsequent addresses.
1107We therefore cannot omit an entry.
1108
1109  If the thread stack has a PC at an address without a corresponding
1110  entry in the register map, we must conservatively scan the registers in
1111  that thread.  This can happen when single-stepping in the debugger,
1112  because the debugger is allowed to invoke arbitrary methods when
1113  a thread is stopped at a breakpoint.  If we can guarantee that a GC
1114  thread scan will never happen while the debugger has that thread stopped,
1115  then we can lift this restriction and simply omit entries that don't
1116  change the bit vector from its previous state.
1117
1118Each entry advances the address value by at least 1 (measured in 16-bit
1119"code units").  Looking at the bootclasspath entries, advancing by 2 units
1120is most common.  Advances by 1 unit are far less common than advances by
11212 units, but more common than 5, and things fall off rapidly.  Gaps of
1122up to 220 code units appear in some computationally intensive bits of code,
1123but are exceedingly rare.
1124
1125If we sum up the number of transitions in a couple of ranges in framework.jar:
1126  [1,4]: 188998 of 218922 gaps (86.3%)
1127  [1,7]: 211647 of 218922 gaps (96.7%)
1128Using a 3-bit delta, with one value reserved as an escape code, should
1129yield good results for the address.
1130
1131These results would change dramatically if we reduced the set of GC
1132points by e.g. removing instructions like integer divide that are only
1133present because they can throw and cause an allocation.
1134
1135We also need to include an "initial gap", because the first few instructions
1136in a method may not be GC points.
1137
1138
1139By observation, many entries simply repeat the previous bit vector, or
1140change only one or two bits.  (This is with type-precise information;
1141the rate of change of bits will be different if live-precise information
1142is factored in).
1143
1144Looking again at adjacent entries in framework.jar:
1145  0 bits changed: 63.0%
1146  1 bit changed: 32.2%
1147After that it falls off rapidly, e.g. the number of entries with 2 bits
1148changed is usually less than 1/10th of the number of entries with 1 bit
1149changed.  A solution that allows us to encode 0- or 1- bit changes
1150efficiently will do well.
1151
1152We still need to handle cases where a large number of bits change.  We
1153probably want a way to drop in a full copy of the bit vector when it's
1154smaller than the representation of multiple bit changes.
1155
1156
1157The bit-change information can be encoded as an index that tells the
1158decoder to toggle the state.  We want to encode the index in as few bits
1159as possible, but we need to allow for fairly wide vectors (e.g. we have a
1160method with 175 registers).  We can deal with this in a couple of ways:
1161(1) use an encoding that assumes few registers and has an escape code
1162for larger numbers of registers; or (2) use different encodings based
1163on how many total registers the method has.  The choice depends to some
1164extent on whether methods with large numbers of registers tend to modify
1165the first 16 regs more often than the others.
1166
1167The last N registers hold method arguments.  If the bytecode is expected
1168to be examined in a debugger, "dx" ensures that the contents of these
1169registers won't change.  Depending upon the encoding format, we may be
1170able to take advantage of this.  We still have to encode the initial
1171state, but we know we'll never have to output a bit change for the last
1172N registers.
1173
1174Considering only methods with 16 or more registers, the "target octant"
1175for register changes looks like this:
1176  [ 43.1%, 16.4%, 6.5%, 6.2%, 7.4%, 8.8%, 9.7%, 1.8% ]
1177As expected, there are fewer changes at the end of the list where the
1178arguments are kept, and more changes at the start of the list because
1179register values smaller than 16 can be used in compact Dalvik instructions
1180and hence are favored for frequently-used values.  In general, the first
1181octant is considerably more active than later entries, the last octant
1182is much less active, and the rest are all about the same.
1183
1184Looking at all bit changes in all methods, 94% are to registers 0-15.  The
1185encoding will benefit greatly by favoring the low-numbered registers.
1186
1187
1188Some of the smaller methods have identical maps, and space could be
1189saved by simply including a pointer to an earlier definition.  This would
1190be best accomplished by specifying a "pointer" format value, followed by
1191a 3-byte (or ULEB128) offset.  Implementing this would probably involve
1192generating a hash value for each register map and maintaining a hash table.
1193
1194In some cases there are repeating patterns in the bit vector that aren't
1195adjacent.  These could benefit from a dictionary encoding.  This doesn't
1196really become useful until the methods reach a certain size though,
1197and managing the dictionary may incur more overhead than we want.
1198
1199Large maps can be compressed significantly.  The trouble is that, when
1200we need to use them, we have to uncompress them onto the heap.  We may
1201get a better trade-off between storage size and heap usage by refusing to
1202compress large maps, so that they can be memory mapped and used directly.
1203(OTOH, only about 2% of the maps will ever actually be used.)
1204
1205
1206----- differential format -----
1207
1208// common header
1209+00 1B format
1210+01 1B regWidth
1211+02 2B numEntries (little-endian)
1212+04 nB length in bytes of the data that follows, in ULEB128 format
1213       (not strictly necessary; allows determination of size w/o full parse)
1214+05+ 1B initial address (0-127), high bit set if max addr >= 256
1215+06+ nB initial value for bit vector
1216
1217// for each entry
1218+00: CCCCBAAA
1219
1220  AAA: address difference.  Values from 0 to 6 indicate an increment of 1
1221  to 7.  A value of 7 indicates that the address difference is large,
1222  and the next byte is a ULEB128-encoded difference value.
1223
1224  B: determines the meaning of CCCC.
1225
1226  CCCC: if B is 0, this is the number of the bit to toggle (0-15).
1227  If B is 1, this is a count of the number of changed bits (1-14).  A value
1228  of 0 means that no bits were changed, and a value of 15 indicates
1229  that enough bits were changed that it required less space to output
1230  the entire bit vector.
1231
1232+01: (optional) ULEB128-encoded address difference
1233
1234+01+: (optional) one or more ULEB128-encoded bit numbers, OR the entire
1235  bit vector.
1236
1237The most common situation is an entry whose address has changed by 2-4
1238code units, has no changes or just a single bit change, and the changed
1239register is less than 16.  We should therefore be able to encode a large
1240number of entries with a single byte, which is half the size of the
1241Compact8 encoding method.
1242*/
1243
1244/*
1245 * Compute some stats on an uncompressed register map.
1246 */
1247#ifdef REGISTER_MAP_STATS
1248static void computeMapStats(RegisterMap* pMap, const Method* method)
1249{
1250    MapStats* pStats = (MapStats*) gDvm.registerMapStats;
1251    const u1 format = dvmRegisterMapGetFormat(pMap);
1252    const u2 numEntries = dvmRegisterMapGetNumEntries(pMap);
1253    const u1* rawMap = pMap->data;
1254    const u1* prevData = NULL;
1255    int ent, addr, prevAddr = -1;
1256
1257    for (ent = 0; ent < numEntries; ent++) {
1258        switch (format) {
1259        case kRegMapFormatCompact8:
1260            addr = *rawMap++;
1261            break;
1262        case kRegMapFormatCompact16:
1263            addr = *rawMap++;
1264            addr |= (*rawMap++) << 8;
1265            break;
1266        default:
1267            /* shouldn't happen */
1268            ALOGE("GLITCH: bad format (%d)", format);
1269            dvmAbort();
1270        }
1271
1272        const u1* dataStart = rawMap;
1273
1274        pStats->totalGcPointCount++;
1275
1276        /*
1277         * Gather "gap size" stats, i.e. the difference in addresses between
1278         * successive GC points.
1279         */
1280        if (prevData != NULL) {
1281            assert(prevAddr >= 0);
1282            int addrDiff = addr - prevAddr;
1283
1284            if (addrDiff < 0) {
1285                ALOGE("GLITCH: address went backward (0x%04x->0x%04x, %s.%s)",
1286                    prevAddr, addr, method->clazz->descriptor, method->name);
1287            } else if (addrDiff > kMaxGcPointGap) {
1288                if (REGISTER_MAP_VERBOSE) {
1289                    ALOGI("HEY: addrDiff is %d, max %d (0x%04x->0x%04x %s.%s)",
1290                        addrDiff, kMaxGcPointGap, prevAddr, addr,
1291                        method->clazz->descriptor, method->name);
1292                }
1293                /* skip this one */
1294            } else {
1295                pStats->gcPointGap[addrDiff]++;
1296            }
1297            pStats->gcGapCount++;
1298
1299
1300            /*
1301             * Compare bit vectors in adjacent entries.  We want to count
1302             * up the number of bits that differ (to see if we frequently
1303             * change 0 or 1 bits) and get a sense for which part of the
1304             * vector changes the most often (near the start, middle, end).
1305             *
1306             * We only do the vector position quantization if we have at
1307             * least 16 registers in the method.
1308             */
1309            int numDiff = 0;
1310            float div = (float) kNumUpdatePosns / method->registersSize;
1311            int regByte;
1312            for (regByte = 0; regByte < pMap->regWidth; regByte++) {
1313                int prev, cur, bit;
1314
1315                prev = prevData[regByte];
1316                cur = dataStart[regByte];
1317
1318                for (bit = 0; bit < 8; bit++) {
1319                    if (((prev >> bit) & 1) != ((cur >> bit) & 1)) {
1320                        numDiff++;
1321
1322                        int bitNum = regByte * 8 + bit;
1323
1324                        if (bitNum < 16)
1325                            pStats->updateLT16++;
1326                        else
1327                            pStats->updateGE16++;
1328
1329                        if (method->registersSize < 16)
1330                            continue;
1331
1332                        if (bitNum >= method->registersSize) {
1333                            /* stuff off the end should be zero in both */
1334                            ALOGE("WEIRD: bit=%d (%d/%d), prev=%02x cur=%02x",
1335                                bit, regByte, method->registersSize,
1336                                prev, cur);
1337                            assert(false);
1338                        }
1339                        int idx = (int) (bitNum * div);
1340                        if (!(idx >= 0 && idx < kNumUpdatePosns)) {
1341                            ALOGE("FAIL: bitNum=%d (of %d) div=%.3f idx=%d",
1342                                bitNum, method->registersSize, div, idx);
1343                            assert(false);
1344                        }
1345                        pStats->updatePosn[idx]++;
1346                    }
1347                }
1348            }
1349
1350            if (numDiff > kMaxDiffBits) {
1351                if (REGISTER_MAP_VERBOSE) {
1352                    ALOGI("WOW: numDiff is %d, max %d", numDiff, kMaxDiffBits);
1353                }
1354            } else {
1355                pStats->numDiffBits[numDiff]++;
1356            }
1357        }
1358
1359        /* advance to start of next line */
1360        rawMap += pMap->regWidth;
1361
1362        prevAddr = addr;
1363        prevData = dataStart;
1364    }
1365}
1366#endif
1367
1368/*
1369 * Compute the difference between two bit vectors.
1370 *
1371 * If "lebOutBuf" is non-NULL, we output the bit indices in ULEB128 format
1372 * as we go.  Otherwise, we just generate the various counts.
1373 *
1374 * The bit vectors are compared byte-by-byte, so any unused bits at the
1375 * end must be zero.
1376 *
1377 * Returns the number of bytes required to hold the ULEB128 output.
1378 *
1379 * If "pFirstBitChanged" or "pNumBitsChanged" are non-NULL, they will
1380 * receive the index of the first changed bit and the number of changed
1381 * bits, respectively.
1382 */
1383static int computeBitDiff(const u1* bits1, const u1* bits2, int byteWidth,
1384    int* pFirstBitChanged, int* pNumBitsChanged, u1* lebOutBuf)
1385{
1386    int numBitsChanged = 0;
1387    int firstBitChanged = -1;
1388    int lebSize = 0;
1389    int byteNum;
1390
1391    /*
1392     * Run through the vectors, first comparing them at the byte level.  This
1393     * will yield a fairly quick result if nothing has changed between them.
1394     */
1395    for (byteNum = 0; byteNum < byteWidth; byteNum++) {
1396        u1 byte1 = *bits1++;
1397        u1 byte2 = *bits2++;
1398        if (byte1 != byte2) {
1399            /*
1400             * Walk through the byte, identifying the changed bits.
1401             */
1402            int bitNum;
1403            for (bitNum = 0; bitNum < 8; bitNum++) {
1404                if (((byte1 >> bitNum) & 0x01) != ((byte2 >> bitNum) & 0x01)) {
1405                    int bitOffset = (byteNum << 3) + bitNum;
1406
1407                    if (firstBitChanged < 0)
1408                        firstBitChanged = bitOffset;
1409                    numBitsChanged++;
1410
1411                    if (lebOutBuf == NULL) {
1412                        lebSize += unsignedLeb128Size(bitOffset);
1413                    } else {
1414                        u1* curBuf = lebOutBuf;
1415                        lebOutBuf = writeUnsignedLeb128(lebOutBuf, bitOffset);
1416                        lebSize += lebOutBuf - curBuf;
1417                    }
1418                }
1419            }
1420        }
1421    }
1422
1423    if (numBitsChanged > 0)
1424        assert(firstBitChanged >= 0);
1425
1426    if (pFirstBitChanged != NULL)
1427        *pFirstBitChanged = firstBitChanged;
1428    if (pNumBitsChanged != NULL)
1429        *pNumBitsChanged = numBitsChanged;
1430
1431    return lebSize;
1432}
1433
1434/*
1435 * Compress the register map with differential encoding.
1436 *
1437 * "meth" is only needed for debug output.
1438 *
1439 * On success, returns a newly-allocated RegisterMap.  If the map is not
1440 * compatible for some reason, or fails to get smaller, this will return NULL.
1441 */
1442static RegisterMap* compressMapDifferential(const RegisterMap* pMap,
1443    const Method* meth)
1444{
1445    RegisterMap* pNewMap = NULL;
1446    int origSize = computeRegisterMapSize(pMap);
1447    u1* tmpPtr;
1448    int addrWidth, regWidth, numEntries;
1449    bool debug = false;
1450
1451    if (false &&
1452        strcmp(meth->clazz->descriptor, "Landroid/text/StaticLayout;") == 0 &&
1453        strcmp(meth->name, "generate") == 0)
1454    {
1455        debug = true;
1456    }
1457
1458    u1 format = dvmRegisterMapGetFormat(pMap);
1459    switch (format) {
1460    case kRegMapFormatCompact8:
1461        addrWidth = 1;
1462        break;
1463    case kRegMapFormatCompact16:
1464        addrWidth = 2;
1465        break;
1466    default:
1467        ALOGE("ERROR: can't compress map with format=%d", format);
1468        return NULL;
1469    }
1470
1471    regWidth = dvmRegisterMapGetRegWidth(pMap);
1472    numEntries = dvmRegisterMapGetNumEntries(pMap);
1473
1474    if (debug) {
1475        ALOGI("COMPRESS: %s.%s aw=%d rw=%d ne=%d",
1476            meth->clazz->descriptor, meth->name,
1477            addrWidth, regWidth, numEntries);
1478        dumpRegisterMap(pMap, -1);
1479    }
1480
1481    if (numEntries <= 1) {
1482        ALOGV("Can't compress map with 0 or 1 entries");
1483        return NULL;
1484    }
1485
1486    /*
1487     * We don't know how large the compressed data will be.  It's possible
1488     * for it to expand and become larger than the original.  The header
1489     * itself is variable-sized, so we generate everything into a temporary
1490     * buffer and then copy it to form-fitting storage once we know how big
1491     * it will be (and that it's smaller than the original).
1492     *
1493     * If we use a size that is equal to the size of the input map plus
1494     * a value longer than a single entry can possibly expand to, we need
1495     * only check for overflow at the end of each entry.  The worst case
1496     * for a single line is (1 + <ULEB8 address> + <full copy of vector>).
1497     * Addresses are 16 bits, so that's (1 + 3 + regWidth).
1498     *
1499     * The initial address offset and bit vector will take up less than
1500     * or equal to the amount of space required when uncompressed -- large
1501     * initial offsets are rejected.
1502     */
1503    UniquePtr<u1[]> tmpBuf(new u1[origSize + (1 + 3 + regWidth)]);
1504    if (tmpBuf.get() == NULL)
1505        return NULL;
1506
1507    tmpPtr = tmpBuf.get();
1508
1509    const u1* mapData = pMap->data;
1510    const u1* prevBits;
1511    u2 addr, prevAddr;
1512
1513    addr = *mapData++;
1514    if (addrWidth > 1)
1515        addr |= (*mapData++) << 8;
1516
1517    if (addr >= 128) {
1518        ALOGV("Can't compress map with starting address >= 128");
1519        return NULL;
1520    }
1521
1522    /*
1523     * Start by writing the initial address and bit vector data.  The high
1524     * bit of the initial address is used to indicate the required address
1525     * width (which the decoder can't otherwise determine without parsing
1526     * the compressed data).
1527     */
1528    *tmpPtr++ = addr | (addrWidth > 1 ? 0x80 : 0x00);
1529    memcpy(tmpPtr, mapData, regWidth);
1530
1531    prevBits = mapData;
1532    prevAddr = addr;
1533
1534    tmpPtr += regWidth;
1535    mapData += regWidth;
1536
1537    /*
1538     * Loop over all following entries.
1539     */
1540    for (int entry = 1; entry < numEntries; entry++) {
1541        int addrDiff;
1542        u1 key;
1543
1544        /*
1545         * Pull out the address and figure out how to encode it.
1546         */
1547        addr = *mapData++;
1548        if (addrWidth > 1)
1549            addr |= (*mapData++) << 8;
1550
1551        if (debug)
1552            ALOGI(" addr=0x%04x ent=%d (aw=%d)", addr, entry, addrWidth);
1553
1554        addrDiff = addr - prevAddr;
1555        assert(addrDiff > 0);
1556        if (addrDiff < 8) {
1557            /* small difference, encode in 3 bits */
1558            key = addrDiff -1;          /* set 00000AAA */
1559            if (debug)
1560                ALOGI(" : small %d, key=0x%02x", addrDiff, key);
1561        } else {
1562            /* large difference, output escape code */
1563            key = 0x07;                 /* escape code for AAA */
1564            if (debug)
1565                ALOGI(" : large %d, key=0x%02x", addrDiff, key);
1566        }
1567
1568        int numBitsChanged, firstBitChanged, lebSize;
1569
1570        lebSize = computeBitDiff(prevBits, mapData, regWidth,
1571            &firstBitChanged, &numBitsChanged, NULL);
1572
1573        if (debug) {
1574            ALOGI(" : diff fbc=%d nbc=%d ls=%d (rw=%d)",
1575                firstBitChanged, numBitsChanged, lebSize, regWidth);
1576        }
1577
1578        if (numBitsChanged == 0) {
1579            /* set B to 1 and CCCC to zero to indicate no bits were changed */
1580            key |= 0x08;
1581            if (debug) ALOGI(" : no bits changed");
1582        } else if (numBitsChanged == 1 && firstBitChanged < 16) {
1583            /* set B to 0 and CCCC to the index of the changed bit */
1584            key |= firstBitChanged << 4;
1585            if (debug) ALOGI(" : 1 low bit changed");
1586        } else if (numBitsChanged < 15 && lebSize < regWidth) {
1587            /* set B to 1 and CCCC to the number of bits */
1588            key |= 0x08 | (numBitsChanged << 4);
1589            if (debug) ALOGI(" : some bits changed");
1590        } else {
1591            /* set B to 1 and CCCC to 0x0f so we store the entire vector */
1592            key |= 0x08 | 0xf0;
1593            if (debug) ALOGI(" : encode original");
1594        }
1595
1596        /*
1597         * Encode output.  Start with the key, follow with the address
1598         * diff (if it didn't fit in 3 bits), then the changed bit info.
1599         */
1600        *tmpPtr++ = key;
1601        if ((key & 0x07) == 0x07)
1602            tmpPtr = writeUnsignedLeb128(tmpPtr, addrDiff);
1603
1604        if ((key & 0x08) != 0) {
1605            int bitCount = key >> 4;
1606            if (bitCount == 0) {
1607                /* nothing changed, no additional output required */
1608            } else if (bitCount == 15) {
1609                /* full vector is most compact representation */
1610                memcpy(tmpPtr, mapData, regWidth);
1611                tmpPtr += regWidth;
1612            } else {
1613                /* write bit indices in LEB128 format */
1614                (void) computeBitDiff(prevBits, mapData, regWidth,
1615                    NULL, NULL, tmpPtr);
1616                tmpPtr += lebSize;
1617            }
1618        } else {
1619            /* single-bit changed, value encoded in key byte */
1620        }
1621
1622        prevBits = mapData;
1623        prevAddr = addr;
1624        mapData += regWidth;
1625
1626        /*
1627         * See if we've run past the original size.
1628         */
1629        if (tmpPtr - tmpBuf.get() >= origSize) {
1630            if (debug) {
1631                ALOGD("Compressed size >= original (%d vs %d): %s.%s",
1632                    tmpPtr - tmpBuf.get(), origSize,
1633                    meth->clazz->descriptor, meth->name);
1634            }
1635            return NULL;
1636        }
1637    }
1638
1639    /*
1640     * Create a RegisterMap with the contents.
1641     *
1642     * TODO: consider using a threshold other than merely ">=".  We would
1643     * get poorer compression but potentially use less native heap space.
1644     */
1645    static const int kHeaderSize = offsetof(RegisterMap, data);
1646    int newDataSize = tmpPtr - tmpBuf.get();
1647    int newMapSize;
1648
1649    newMapSize = kHeaderSize + unsignedLeb128Size(newDataSize) + newDataSize;
1650    if (newMapSize >= origSize) {
1651        if (debug) {
1652            ALOGD("Final comp size >= original (%d vs %d): %s.%s",
1653                newMapSize, origSize, meth->clazz->descriptor, meth->name);
1654        }
1655        return NULL;
1656    }
1657
1658    pNewMap = (RegisterMap*) malloc(newMapSize);
1659    if (pNewMap == NULL)
1660        return NULL;
1661    dvmRegisterMapSetFormat(pNewMap, kRegMapFormatDifferential);
1662    dvmRegisterMapSetOnHeap(pNewMap, true);
1663    dvmRegisterMapSetRegWidth(pNewMap, regWidth);
1664    dvmRegisterMapSetNumEntries(pNewMap, numEntries);
1665
1666    tmpPtr = pNewMap->data;
1667    tmpPtr = writeUnsignedLeb128(tmpPtr, newDataSize);
1668    memcpy(tmpPtr, tmpBuf.get(), newDataSize);
1669
1670    if (REGISTER_MAP_VERBOSE) {
1671        ALOGD("Compression successful (%d -> %d) from aw=%d rw=%d ne=%d",
1672            computeRegisterMapSize(pMap), computeRegisterMapSize(pNewMap),
1673            addrWidth, regWidth, numEntries);
1674    }
1675
1676    return pNewMap;
1677}
1678
1679/*
1680 * Toggle the value of the "idx"th bit in "ptr".
1681 */
1682static inline void toggleBit(u1* ptr, int idx)
1683{
1684    ptr[idx >> 3] ^= 1 << (idx & 0x07);
1685}
1686
1687/*
1688 * Expand a compressed map to an uncompressed form.
1689 *
1690 * Returns a newly-allocated RegisterMap on success, or NULL on failure.
1691 *
1692 * TODO: consider using the linear allocator or a custom allocator with
1693 * LRU replacement for these instead of the native heap.
1694 */
1695static RegisterMap* uncompressMapDifferential(const RegisterMap* pMap)
1696{
1697    static const int kHeaderSize = offsetof(RegisterMap, data);
1698    u1 format = dvmRegisterMapGetFormat(pMap);
1699    RegisterMapFormat newFormat;
1700    int regWidth, numEntries, newAddrWidth, newMapSize;
1701
1702    if (format != kRegMapFormatDifferential) {
1703        ALOGE("Not differential (%d)", format);
1704        return NULL;
1705    }
1706
1707    regWidth = dvmRegisterMapGetRegWidth(pMap);
1708    numEntries = dvmRegisterMapGetNumEntries(pMap);
1709
1710    /* get the data size; we can check this at the end */
1711    const u1* srcPtr = pMap->data;
1712    int expectedSrcLen = readUnsignedLeb128(&srcPtr);
1713    const u1* srcStart = srcPtr;
1714
1715    /* get the initial address and the 16-bit address flag */
1716    int addr = *srcPtr & 0x7f;
1717    if ((*srcPtr & 0x80) == 0) {
1718        newFormat = kRegMapFormatCompact8;
1719        newAddrWidth = 1;
1720    } else {
1721        newFormat = kRegMapFormatCompact16;
1722        newAddrWidth = 2;
1723    }
1724    srcPtr++;
1725
1726    /* now we know enough to allocate the new map */
1727    if (REGISTER_MAP_VERBOSE) {
1728        ALOGI("Expanding to map aw=%d rw=%d ne=%d",
1729            newAddrWidth, regWidth, numEntries);
1730    }
1731    newMapSize = kHeaderSize + (newAddrWidth + regWidth) * numEntries;
1732    RegisterMap* pNewMap = (RegisterMap*) malloc(newMapSize);
1733
1734    if (pNewMap == NULL)
1735      return NULL;
1736
1737    dvmRegisterMapSetFormat(pNewMap, newFormat);
1738    dvmRegisterMapSetOnHeap(pNewMap, true);
1739    dvmRegisterMapSetRegWidth(pNewMap, regWidth);
1740    dvmRegisterMapSetNumEntries(pNewMap, numEntries);
1741
1742    /*
1743     * Write the start address and initial bits to the new map.
1744     */
1745    u1* dstPtr = pNewMap->data;
1746
1747    *dstPtr++ = addr & 0xff;
1748    if (newAddrWidth > 1)
1749        *dstPtr++ = (u1) (addr >> 8);
1750
1751    memcpy(dstPtr, srcPtr, regWidth);
1752
1753    int prevAddr = addr;
1754    const u1* prevBits = dstPtr;    /* point at uncompressed data */
1755
1756    dstPtr += regWidth;
1757    srcPtr += regWidth;
1758
1759    /*
1760     * Walk through, uncompressing one line at a time.
1761     */
1762    int entry;
1763    for (entry = 1; entry < numEntries; entry++) {
1764        int addrDiff;
1765        u1 key;
1766
1767        key = *srcPtr++;
1768
1769        /* get the address */
1770        if ((key & 0x07) == 7) {
1771            /* address diff follows in ULEB128 */
1772            addrDiff = readUnsignedLeb128(&srcPtr);
1773        } else {
1774            addrDiff = (key & 0x07) +1;
1775        }
1776
1777        addr = prevAddr + addrDiff;
1778        *dstPtr++ = addr & 0xff;
1779        if (newAddrWidth > 1)
1780            *dstPtr++ = (u1) (addr >> 8);
1781
1782        /* unpack the bits */
1783        if ((key & 0x08) != 0) {
1784            int bitCount = (key >> 4);
1785            if (bitCount == 0) {
1786                /* no bits changed, just copy previous */
1787                memcpy(dstPtr, prevBits, regWidth);
1788            } else if (bitCount == 15) {
1789                /* full copy of bit vector is present; ignore prevBits */
1790                memcpy(dstPtr, srcPtr, regWidth);
1791                srcPtr += regWidth;
1792            } else {
1793                /* copy previous bits and modify listed indices */
1794                memcpy(dstPtr, prevBits, regWidth);
1795                while (bitCount--) {
1796                    int bitIndex = readUnsignedLeb128(&srcPtr);
1797                    toggleBit(dstPtr, bitIndex);
1798                }
1799            }
1800        } else {
1801            /* copy previous bits and modify the specified one */
1802            memcpy(dstPtr, prevBits, regWidth);
1803
1804            /* one bit, from 0-15 inclusive, was changed */
1805            toggleBit(dstPtr, key >> 4);
1806        }
1807
1808        prevAddr = addr;
1809        prevBits = dstPtr;
1810        dstPtr += regWidth;
1811    }
1812
1813    if (dstPtr - (u1*) pNewMap != newMapSize) {
1814        ALOGE("ERROR: output %d bytes, expected %d",
1815            dstPtr - (u1*) pNewMap, newMapSize);
1816        free(pNewMap);
1817        return NULL;
1818    }
1819
1820    if (srcPtr - srcStart != expectedSrcLen) {
1821        ALOGE("ERROR: consumed %d bytes, expected %d",
1822            srcPtr - srcStart, expectedSrcLen);
1823        free(pNewMap);
1824        return NULL;
1825    }
1826
1827    if (REGISTER_MAP_VERBOSE) {
1828        ALOGD("Expansion successful (%d -> %d)",
1829            computeRegisterMapSize(pMap), computeRegisterMapSize(pNewMap));
1830    }
1831
1832    return pNewMap;
1833}
1834