1/*
2 * Copyright (C) 2009 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * This code generate "register maps" for Dalvik bytecode.  In a stack-based
19 * VM we might call these "stack maps".  They are used to increase the
20 * precision in the garbage collector when scanning references in the
21 * interpreter thread stacks.
22 */
23#include "Dalvik.h"
24#include "UniquePtr.h"
25#include "analysis/CodeVerify.h"
26#include "analysis/RegisterMap.h"
27#include "libdex/DexCatch.h"
28#include "libdex/InstrUtils.h"
29#include "libdex/Leb128.h"
30
31#include <stddef.h>
32
33/* double-check the compression */
34#define REGISTER_MAP_VERIFY     false
35
36/* verbose logging */
37#define REGISTER_MAP_VERBOSE    false
38
39//#define REGISTER_MAP_STATS
40
41// fwd
42static void outputTypeVector(const RegType* regs, int insnRegCount, u1* data);
43static bool verifyMap(VerifierData* vdata, const RegisterMap* pMap);
44static int compareMaps(const RegisterMap* pMap1, const RegisterMap* pMap2);
45
46#ifdef REGISTER_MAP_STATS
47static void computeMapStats(RegisterMap* pMap, const Method* method);
48#endif
49static RegisterMap* compressMapDifferential(const RegisterMap* pMap,\
50    const Method* meth);
51static RegisterMap* uncompressMapDifferential(const RegisterMap* pMap);
52
53#ifdef REGISTER_MAP_STATS
54/*
55 * Generate some statistics on the register maps we create and use.
56 */
57#define kMaxGcPointGap      50
58#define kUpdatePosnMinRegs  24
59#define kNumUpdatePosns     8
60#define kMaxDiffBits        20
61struct MapStats {
62    /*
63     * Buckets measuring the distance between GC points.  This tells us how
64     * many bits we need to encode the advancing program counter.  We ignore
65     * some of the "long tail" entries.
66     */
67    int gcPointGap[kMaxGcPointGap];
68
69    /*
70     * Number of gaps.  Equal to (number of gcPoints - number of methods),
71     * since the computation isn't including the initial gap.
72     */
73    int gcGapCount;
74
75    /*
76     * Number of gaps.
77     */
78    int totalGcPointCount;
79
80    /*
81     * For larger methods (>= 24 registers), measure in which octant register
82     * updates occur.  This should help us understand whether register
83     * changes tend to cluster in the low regs even for large methods.
84     */
85    int updatePosn[kNumUpdatePosns];
86
87    /*
88     * For all methods, count up the number of changes to registers < 16
89     * and >= 16.
90     */
91    int updateLT16;
92    int updateGE16;
93
94    /*
95     * Histogram of the number of bits that differ between adjacent entries.
96     */
97    int numDiffBits[kMaxDiffBits];
98
99
100    /*
101     * Track the number of expanded maps, and the heap space required to
102     * hold them.
103     */
104    int numExpandedMaps;
105    int totalExpandedMapSize;
106};
107#endif
108
109/*
110 * Prepare some things.
111 */
112bool dvmRegisterMapStartup()
113{
114#ifdef REGISTER_MAP_STATS
115    MapStats* pStats = calloc(1, sizeof(MapStats));
116    gDvm.registerMapStats = pStats;
117#endif
118    return true;
119}
120
121/*
122 * Clean up.
123 */
124void dvmRegisterMapShutdown()
125{
126#ifdef REGISTER_MAP_STATS
127    free(gDvm.registerMapStats);
128#endif
129}
130
131/*
132 * Write stats to log file.
133 */
134void dvmRegisterMapDumpStats()
135{
136#ifdef REGISTER_MAP_STATS
137    MapStats* pStats = (MapStats*) gDvm.registerMapStats;
138    int i, end;
139
140    for (end = kMaxGcPointGap-1; end >= 0; end--) {
141        if (pStats->gcPointGap[end] != 0)
142            break;
143    }
144
145    ALOGI("Register Map gcPointGap stats (diff count=%d, total=%d):",
146        pStats->gcGapCount, pStats->totalGcPointCount);
147    assert(pStats->gcPointGap[0] == 0);
148    for (i = 1; i <= end; i++) {
149        ALOGI(" %2d %d", i, pStats->gcPointGap[i]);
150    }
151
152
153    for (end = kMaxDiffBits-1; end >= 0; end--) {
154        if (pStats->numDiffBits[end] != 0)
155            break;
156    }
157
158    ALOGI("Register Map bit difference stats:");
159    for (i = 0; i <= end; i++) {
160        ALOGI(" %2d %d", i, pStats->numDiffBits[i]);
161    }
162
163
164    ALOGI("Register Map update position stats (lt16=%d ge16=%d):",
165        pStats->updateLT16, pStats->updateGE16);
166    for (i = 0; i < kNumUpdatePosns; i++) {
167        ALOGI(" %2d %d", i, pStats->updatePosn[i]);
168    }
169#endif
170}
171
172
173/*
174 * ===========================================================================
175 *      Map generation
176 * ===========================================================================
177 */
178
179/*
180 * Generate the register map for a method that has just been verified
181 * (i.e. we're doing this as part of verification).
182 *
183 * For type-precise determination we have all the data we need, so we
184 * just need to encode it in some clever fashion.
185 *
186 * Returns a pointer to a newly-allocated RegisterMap, or NULL on failure.
187 */
188RegisterMap* dvmGenerateRegisterMapV(VerifierData* vdata)
189{
190    static const int kHeaderSize = offsetof(RegisterMap, data);
191    RegisterMap* pMap = NULL;
192    RegisterMap* pResult = NULL;
193    RegisterMapFormat format;
194    u1 regWidth;
195    u1* mapData;
196    int i, bytesForAddr, gcPointCount;
197    int bufSize;
198
199    if (vdata->method->registersSize >= 2048) {
200        ALOGE("ERROR: register map can't handle %d registers",
201            vdata->method->registersSize);
202        goto bail;
203    }
204    regWidth = (vdata->method->registersSize + 7) / 8;
205
206    /*
207     * Decide if we need 8 or 16 bits to hold the address.  Strictly speaking
208     * we only need 16 bits if we actually encode an address >= 256 -- if
209     * the method has a section at the end without GC points (e.g. array
210     * data) we don't need to count it.  The situation is unusual, and
211     * detecting it requires scanning the entire method, so we don't bother.
212     */
213    if (vdata->insnsSize < 256) {
214        format = kRegMapFormatCompact8;
215        bytesForAddr = 1;
216    } else {
217        format = kRegMapFormatCompact16;
218        bytesForAddr = 2;
219    }
220
221    /*
222     * Count up the number of GC point instructions.
223     *
224     * NOTE: this does not automatically include the first instruction,
225     * since we don't count method entry as a GC point.
226     */
227    gcPointCount = 0;
228    for (i = 0; i < (int) vdata->insnsSize; i++) {
229        if (dvmInsnIsGcPoint(vdata->insnFlags, i))
230            gcPointCount++;
231    }
232    if (gcPointCount >= 65536) {
233        /* we could handle this, but in practice we don't get near this */
234        ALOGE("ERROR: register map can't handle %d gc points in one method",
235            gcPointCount);
236        goto bail;
237    }
238
239    /*
240     * Allocate a buffer to hold the map data.
241     */
242    bufSize = kHeaderSize + gcPointCount * (bytesForAddr + regWidth);
243
244    ALOGV("+++ grm: %s.%s (adr=%d gpc=%d rwd=%d bsz=%d)",
245        vdata->method->clazz->descriptor, vdata->method->name,
246        bytesForAddr, gcPointCount, regWidth, bufSize);
247
248    pMap = (RegisterMap*) malloc(bufSize);
249    dvmRegisterMapSetFormat(pMap, format);
250    dvmRegisterMapSetOnHeap(pMap, true);
251    dvmRegisterMapSetRegWidth(pMap, regWidth);
252    dvmRegisterMapSetNumEntries(pMap, gcPointCount);
253
254    /*
255     * Populate it.
256     */
257    mapData = pMap->data;
258    for (i = 0; i < (int) vdata->insnsSize; i++) {
259        if (dvmInsnIsGcPoint(vdata->insnFlags, i)) {
260            assert(vdata->registerLines[i].regTypes != NULL);
261            if (format == kRegMapFormatCompact8) {
262                *mapData++ = i;
263            } else /*kRegMapFormatCompact16*/ {
264                *mapData++ = i & 0xff;
265                *mapData++ = i >> 8;
266            }
267            outputTypeVector(vdata->registerLines[i].regTypes,
268                vdata->insnRegCount, mapData);
269            mapData += regWidth;
270        }
271    }
272
273    ALOGV("mapData=%p pMap=%p bufSize=%d", mapData, pMap, bufSize);
274    assert(mapData - (const u1*) pMap == bufSize);
275
276    if (REGISTER_MAP_VERIFY && !verifyMap(vdata, pMap))
277        goto bail;
278#ifdef REGISTER_MAP_STATS
279    computeMapStats(pMap, vdata->method);
280#endif
281
282    /*
283     * Try to compress the map.
284     */
285    RegisterMap* pCompMap;
286
287    pCompMap = compressMapDifferential(pMap, vdata->method);
288    if (pCompMap != NULL) {
289        if (REGISTER_MAP_VERIFY) {
290            /*
291             * Expand the compressed map we just created, and compare it
292             * to the original.  Abort the VM if it doesn't match up.
293             */
294            RegisterMap* pUncompMap;
295            pUncompMap = uncompressMapDifferential(pCompMap);
296            if (pUncompMap == NULL) {
297                ALOGE("Map failed to uncompress - %s.%s",
298                    vdata->method->clazz->descriptor,
299                    vdata->method->name);
300                free(pCompMap);
301                /* bad - compression is broken or we're out of memory */
302                dvmAbort();
303            } else {
304                if (compareMaps(pMap, pUncompMap) != 0) {
305                    ALOGE("Map comparison failed - %s.%s",
306                        vdata->method->clazz->descriptor,
307                        vdata->method->name);
308                    free(pCompMap);
309                    /* bad - compression is broken */
310                    dvmAbort();
311                }
312
313                /* verify succeeded */
314                delete pUncompMap;
315            }
316        }
317
318        if (REGISTER_MAP_VERBOSE) {
319            ALOGD("Good compress on %s.%s",
320                vdata->method->clazz->descriptor,
321                vdata->method->name);
322        }
323        free(pMap);
324        pMap = pCompMap;
325    } else {
326        if (REGISTER_MAP_VERBOSE) {
327            ALOGD("Unable to compress %s.%s (ent=%d rw=%d)",
328                vdata->method->clazz->descriptor,
329                vdata->method->name,
330                dvmRegisterMapGetNumEntries(pMap),
331                dvmRegisterMapGetRegWidth(pMap));
332        }
333    }
334
335    pResult = pMap;
336
337bail:
338    return pResult;
339}
340
341/*
342 * Release the storage held by a RegisterMap.
343 */
344void dvmFreeRegisterMap(RegisterMap* pMap)
345{
346    if (pMap == NULL)
347        return;
348
349    assert(dvmRegisterMapGetOnHeap(pMap));
350    free(pMap);
351}
352
353/*
354 * Determine if the RegType value is a reference type.
355 *
356 * Ordinarily we include kRegTypeZero in the "is it a reference"
357 * check.  There's no value in doing so here, because we know
358 * the register can't hold anything but zero.
359 */
360static inline bool isReferenceType(RegType type)
361{
362    return (type > kRegTypeMAX || type == kRegTypeUninit);
363}
364
365/*
366 * Given a line of registers, output a bit vector that indicates whether
367 * or not the register holds a reference type (which could be null).
368 *
369 * We use '1' to indicate it's a reference, '0' for anything else (numeric
370 * value, uninitialized data, merge conflict).  Register 0 will be found
371 * in the low bit of the first byte.
372 */
373static void outputTypeVector(const RegType* regs, int insnRegCount, u1* data)
374{
375    u1 val = 0;
376    int i;
377
378    for (i = 0; i < insnRegCount; i++) {
379        RegType type = *regs++;
380        val >>= 1;
381        if (isReferenceType(type))
382            val |= 0x80;        /* set hi bit */
383
384        if ((i & 0x07) == 7)
385            *data++ = val;
386    }
387    if ((i & 0x07) != 0) {
388        /* flush bits from last byte */
389        val >>= 8 - (i & 0x07);
390        *data++ = val;
391    }
392}
393
394/*
395 * Print the map as a series of binary strings.
396 *
397 * Pass in method->registersSize if known, or -1 if not.
398 */
399static void dumpRegisterMap(const RegisterMap* pMap, int registersSize)
400{
401    const u1* rawMap = pMap->data;
402    const RegisterMapFormat format = dvmRegisterMapGetFormat(pMap);
403    const int numEntries = dvmRegisterMapGetNumEntries(pMap);
404    const int regWidth = dvmRegisterMapGetRegWidth(pMap);
405    int addrWidth;
406
407    switch (format) {
408    case kRegMapFormatCompact8:
409        addrWidth = 1;
410        break;
411    case kRegMapFormatCompact16:
412        addrWidth = 2;
413        break;
414    default:
415        /* can't happen */
416        ALOGE("Can only dump Compact8 / Compact16 maps (not %d)", format);
417        return;
418    }
419
420    if (registersSize < 0)
421        registersSize = 8 * regWidth;
422    assert(registersSize <= regWidth * 8);
423
424    int ent;
425    for (ent = 0; ent < numEntries; ent++) {
426        int i, addr;
427
428        addr = *rawMap++;
429        if (addrWidth > 1)
430            addr |= (*rawMap++) << 8;
431
432        const u1* dataStart = rawMap;
433        u1 val = 0;
434
435        /* create binary string */
436        char outBuf[registersSize +1];
437        for (i = 0; i < registersSize; i++) {
438            val >>= 1;
439            if ((i & 0x07) == 0)
440                val = *rawMap++;
441
442            outBuf[i] = '0' + (val & 0x01);
443        }
444        outBuf[i] = '\0';
445
446        /* back up and create hex dump */
447        char hexBuf[regWidth * 3 +1];
448        char* cp = hexBuf;
449        rawMap = dataStart;
450        for (i = 0; i < regWidth; i++) {
451            sprintf(cp, " %02x", *rawMap++);
452            cp += 3;
453        }
454        hexBuf[i * 3] = '\0';
455
456        ALOGD("  %04x %s %s", addr, outBuf, hexBuf);
457    }
458}
459
460/*
461 * Double-check the map.
462 *
463 * We run through all of the data in the map, and compare it to the original.
464 * Only works on uncompressed data.
465 */
466static bool verifyMap(VerifierData* vdata, const RegisterMap* pMap)
467{
468    const u1* rawMap = pMap->data;
469    const RegisterMapFormat format = dvmRegisterMapGetFormat(pMap);
470    const int numEntries = dvmRegisterMapGetNumEntries(pMap);
471    int ent;
472    bool dumpMap = false;
473
474    if (false) {
475        const char* cd = "Landroid/net/http/Request;";
476        const char* mn = "readResponse";
477        if (strcmp(vdata->method->clazz->descriptor, cd) == 0 &&
478            strcmp(vdata->method->name, mn) == 0)
479        {
480            char* desc;
481            desc = dexProtoCopyMethodDescriptor(&vdata->method->prototype);
482            ALOGI("Map for %s.%s %s", vdata->method->clazz->descriptor,
483                vdata->method->name, desc);
484            free(desc);
485
486            dumpMap = true;
487        }
488    }
489
490    if ((vdata->method->registersSize + 7) / 8 != pMap->regWidth) {
491        ALOGE("GLITCH: registersSize=%d, regWidth=%d",
492            vdata->method->registersSize, pMap->regWidth);
493        return false;
494    }
495
496    for (ent = 0; ent < numEntries; ent++) {
497        int addr;
498
499        switch (format) {
500        case kRegMapFormatCompact8:
501            addr = *rawMap++;
502            break;
503        case kRegMapFormatCompact16:
504            addr = *rawMap++;
505            addr |= (*rawMap++) << 8;
506            break;
507        default:
508            /* shouldn't happen */
509            ALOGE("GLITCH: bad format (%d)", format);
510            dvmAbort();
511            /* Make compiler happy */
512            addr = 0;
513        }
514
515        const RegType* regs = vdata->registerLines[addr].regTypes;
516        if (regs == NULL) {
517            ALOGE("GLITCH: addr %d has no data", addr);
518            return false;
519        }
520
521        u1 val = 0;
522        int i;
523
524        for (i = 0; i < vdata->method->registersSize; i++) {
525            bool bitIsRef, regIsRef;
526
527            val >>= 1;
528            if ((i & 0x07) == 0) {
529                /* load next byte of data */
530                val = *rawMap++;
531            }
532
533            bitIsRef = val & 0x01;
534
535            RegType type = regs[i];
536            regIsRef = isReferenceType(type);
537
538            if (bitIsRef != regIsRef) {
539                ALOGE("GLITCH: addr %d reg %d: bit=%d reg=%d(%d)",
540                    addr, i, bitIsRef, regIsRef, type);
541                return false;
542            }
543        }
544
545        /* rawMap now points to the address field of the next entry */
546    }
547
548    if (dumpMap)
549        dumpRegisterMap(pMap, vdata->method->registersSize);
550
551    return true;
552}
553
554
555/*
556 * ===========================================================================
557 *      DEX generation & parsing
558 * ===========================================================================
559 */
560
561/*
562 * Advance "ptr" to ensure 32-bit alignment.
563 */
564static inline u1* align32(u1* ptr)
565{
566    return (u1*) (((int) ptr + 3) & ~0x03);
567}
568
569/*
570 * Compute the size, in bytes, of a register map.
571 */
572static size_t computeRegisterMapSize(const RegisterMap* pMap)
573{
574    static const int kHeaderSize = offsetof(RegisterMap, data);
575    u1 format = dvmRegisterMapGetFormat(pMap);
576    u2 numEntries = dvmRegisterMapGetNumEntries(pMap);
577
578    assert(pMap != NULL);
579
580    switch (format) {
581    case kRegMapFormatNone:
582        return 1;
583    case kRegMapFormatCompact8:
584        return kHeaderSize + (1 + pMap->regWidth) * numEntries;
585    case kRegMapFormatCompact16:
586        return kHeaderSize + (2 + pMap->regWidth) * numEntries;
587    case kRegMapFormatDifferential:
588        {
589            /* kHeaderSize + decoded ULEB128 length */
590            const u1* ptr = pMap->data;
591            int len = readUnsignedLeb128(&ptr);
592            return len + (ptr - (u1*) pMap);
593        }
594    default:
595        ALOGE("Bad register map format %d", format);
596        dvmAbort();
597        return 0;
598    }
599}
600
601/*
602 * Output the map for a single method, if it has one.
603 *
604 * Abstract and native methods have no map.  All others are expected to
605 * have one, since we know the class verified successfully.
606 *
607 * This strips the "allocated on heap" flag from the format byte, so that
608 * direct-mapped maps are correctly identified as such.
609 */
610static bool writeMapForMethod(const Method* meth, u1** pPtr)
611{
612    if (meth->registerMap == NULL) {
613        if (!dvmIsAbstractMethod(meth) && !dvmIsNativeMethod(meth)) {
614            ALOGW("Warning: no map available for %s.%s",
615                meth->clazz->descriptor, meth->name);
616            /* weird, but keep going */
617        }
618        *(*pPtr)++ = kRegMapFormatNone;
619        return true;
620    }
621
622    /* serialize map into the buffer */
623    size_t mapSize = computeRegisterMapSize(meth->registerMap);
624    memcpy(*pPtr, meth->registerMap, mapSize);
625
626    /* strip the "on heap" flag out of the format byte, which is always first */
627    assert(**pPtr == meth->registerMap->format);
628    **pPtr &= ~(kRegMapFormatOnHeap);
629
630    *pPtr += mapSize;
631
632    return true;
633}
634
635/*
636 * Write maps for all methods in the specified class to the buffer, which
637 * can hold at most "length" bytes.  "*pPtr" will be advanced past the end
638 * of the data we write.
639 */
640static bool writeMapsAllMethods(DvmDex* pDvmDex, const ClassObject* clazz,
641    u1** pPtr, size_t length)
642{
643    RegisterMapMethodPool* pMethodPool;
644    u1* ptr = *pPtr;
645    int i, methodCount;
646
647    /* artificial limit */
648    if (clazz->virtualMethodCount + clazz->directMethodCount >= 65536) {
649        ALOGE("Too many methods in %s", clazz->descriptor);
650        return false;
651    }
652
653    pMethodPool = (RegisterMapMethodPool*) ptr;
654    ptr += offsetof(RegisterMapMethodPool, methodData);
655    methodCount = 0;
656
657    /*
658     * Run through all methods, direct then virtual.  The class loader will
659     * traverse them in the same order.  (We could split them into two
660     * distinct pieces, but there doesn't appear to be any value in doing
661     * so other than that it makes class loading slightly less fragile.)
662     *
663     * The class loader won't know about miranda methods at the point
664     * where it parses this, so we omit those.
665     *
666     * TODO: consider omitting all native/abstract definitions.  Should be
667     * safe, though we lose the ability to sanity-check against the
668     * method counts in the DEX file.
669     */
670    for (i = 0; i < clazz->directMethodCount; i++) {
671        const Method* meth = &clazz->directMethods[i];
672        if (dvmIsMirandaMethod(meth))
673            continue;
674        if (!writeMapForMethod(&clazz->directMethods[i], &ptr)) {
675            return false;
676        }
677        methodCount++;
678        //ptr = align32(ptr);
679    }
680
681    for (i = 0; i < clazz->virtualMethodCount; i++) {
682        const Method* meth = &clazz->virtualMethods[i];
683        if (dvmIsMirandaMethod(meth))
684            continue;
685        if (!writeMapForMethod(&clazz->virtualMethods[i], &ptr)) {
686            return false;
687        }
688        methodCount++;
689        //ptr = align32(ptr);
690    }
691
692    pMethodPool->methodCount = methodCount;
693
694    *pPtr = ptr;
695    return true;
696}
697
698/*
699 * Write maps for all classes to the specified buffer, which can hold at
700 * most "length" bytes.
701 *
702 * Returns the actual length used, or 0 on failure.
703 */
704static size_t writeMapsAllClasses(DvmDex* pDvmDex, u1* basePtr, size_t length)
705{
706    DexFile* pDexFile = pDvmDex->pDexFile;
707    u4 count = pDexFile->pHeader->classDefsSize;
708    RegisterMapClassPool* pClassPool;
709    u4* offsetTable;
710    u1* ptr = basePtr;
711    u4 idx;
712
713    assert(gDvm.optimizing);
714
715    pClassPool = (RegisterMapClassPool*) ptr;
716    ptr += offsetof(RegisterMapClassPool, classDataOffset);
717    offsetTable = (u4*) ptr;
718    ptr += count * sizeof(u4);
719
720    pClassPool->numClasses = count;
721
722    /*
723     * We want an entry for every class, loaded or not.
724     */
725    for (idx = 0; idx < count; idx++) {
726        const DexClassDef* pClassDef;
727        const char* classDescriptor;
728        ClassObject* clazz;
729
730        pClassDef = dexGetClassDef(pDexFile, idx);
731        classDescriptor = dexStringByTypeIdx(pDexFile, pClassDef->classIdx);
732
733        /*
734         * All classes have been loaded into the bootstrap class loader.
735         * If we can find it, and it was successfully pre-verified, we
736         * run through its methods and add the register maps.
737         *
738         * If it wasn't pre-verified then we know it can't have any
739         * register maps.  Classes that can't be loaded or failed
740         * verification get an empty slot in the index.
741         */
742        clazz = NULL;
743        if ((pClassDef->accessFlags & CLASS_ISPREVERIFIED) != 0)
744            clazz = dvmLookupClass(classDescriptor, NULL, false);
745
746        if (clazz != NULL) {
747            offsetTable[idx] = ptr - basePtr;
748            LOGVV("%d -> offset %d (%p-%p)",
749                idx, offsetTable[idx], ptr, basePtr);
750
751            if (!writeMapsAllMethods(pDvmDex, clazz, &ptr,
752                    length - (ptr - basePtr)))
753            {
754                return 0;
755            }
756
757            ptr = align32(ptr);
758            LOGVV("Size %s (%d+%d methods): %d", clazz->descriptor,
759                clazz->directMethodCount, clazz->virtualMethodCount,
760                (ptr - basePtr) - offsetTable[idx]);
761        } else {
762            ALOGV("%4d NOT mapadding '%s'", idx, classDescriptor);
763            assert(offsetTable[idx] == 0);
764        }
765    }
766
767    if (ptr - basePtr >= (int)length) {
768        /* a bit late */
769        ALOGE("Buffer overrun");
770        dvmAbort();
771    }
772
773    return ptr - basePtr;
774}
775
776/*
777 * Generate a register map set for all verified classes in "pDvmDex".
778 */
779RegisterMapBuilder* dvmGenerateRegisterMaps(DvmDex* pDvmDex)
780{
781    RegisterMapBuilder* pBuilder;
782
783    pBuilder = (RegisterMapBuilder*) calloc(1, sizeof(RegisterMapBuilder));
784    if (pBuilder == NULL)
785        return NULL;
786
787    /*
788     * We have a couple of options here:
789     *  (1) Compute the size of the output, and malloc a buffer.
790     *  (2) Create a "large-enough" anonymous mmap region.
791     *
792     * The nice thing about option #2 is that we don't have to traverse
793     * all of the classes and methods twice.  The risk is that we might
794     * not make the region large enough.  Since the pages aren't mapped
795     * until used we can allocate a semi-absurd amount of memory without
796     * worrying about the effect on the rest of the system.
797     *
798     * The basic encoding on the largest jar file requires about 1MB of
799     * storage.  We map out 4MB here.  (TODO: guarantee that the last
800     * page of the mapping is marked invalid, so we reliably fail if
801     * we overrun.)
802     */
803    if (sysCreatePrivateMap(4 * 1024 * 1024, &pBuilder->memMap) != 0) {
804        free(pBuilder);
805        return NULL;
806    }
807
808    /*
809     * Create the maps.
810     */
811    size_t actual = writeMapsAllClasses(pDvmDex, (u1*)pBuilder->memMap.addr,
812                                        pBuilder->memMap.length);
813    if (actual == 0) {
814        dvmFreeRegisterMapBuilder(pBuilder);
815        return NULL;
816    }
817
818    ALOGV("TOTAL size of register maps: %d", actual);
819
820    pBuilder->data = pBuilder->memMap.addr;
821    pBuilder->size = actual;
822    return pBuilder;
823}
824
825/*
826 * Free the builder.
827 */
828void dvmFreeRegisterMapBuilder(RegisterMapBuilder* pBuilder)
829{
830    if (pBuilder == NULL)
831        return;
832
833    sysReleaseShmem(&pBuilder->memMap);
834    free(pBuilder);
835}
836
837
838/*
839 * Find the data for the specified class.
840 *
841 * If there's no register map data, or none for this class, we return NULL.
842 */
843const void* dvmRegisterMapGetClassData(const DexFile* pDexFile, u4 classIdx,
844    u4* pNumMaps)
845{
846    const RegisterMapClassPool* pClassPool;
847    const RegisterMapMethodPool* pMethodPool;
848
849    pClassPool = (const RegisterMapClassPool*) pDexFile->pRegisterMapPool;
850    if (pClassPool == NULL)
851        return NULL;
852
853    if (classIdx >= pClassPool->numClasses) {
854        ALOGE("bad class index (%d vs %d)", classIdx, pClassPool->numClasses);
855        dvmAbort();
856    }
857
858    u4 classOffset = pClassPool->classDataOffset[classIdx];
859    if (classOffset == 0) {
860        ALOGV("+++ no map for classIdx=%d", classIdx);
861        return NULL;
862    }
863
864    pMethodPool =
865        (const RegisterMapMethodPool*) (((u1*) pClassPool) + classOffset);
866    if (pNumMaps != NULL)
867        *pNumMaps = pMethodPool->methodCount;
868    return pMethodPool->methodData;
869}
870
871/*
872 * This advances "*pPtr" and returns its original value.
873 */
874const RegisterMap* dvmRegisterMapGetNext(const void** pPtr)
875{
876    const RegisterMap* pMap = (const RegisterMap*) *pPtr;
877
878    *pPtr = /*align32*/(((u1*) pMap) + computeRegisterMapSize(pMap));
879    LOGVV("getNext: %p -> %p (f=%#x w=%d e=%d)",
880        pMap, *pPtr, pMap->format, pMap->regWidth,
881        dvmRegisterMapGetNumEntries(pMap));
882    return pMap;
883}
884
885
886/*
887 * ===========================================================================
888 *      Utility functions
889 * ===========================================================================
890 */
891
892/*
893 * Return the data for the specified address, or NULL if not found.
894 *
895 * The result must be released with dvmReleaseRegisterMapLine().
896 */
897const u1* dvmRegisterMapGetLine(const RegisterMap* pMap, int addr)
898{
899    int addrWidth, lineWidth;
900    u1 format = dvmRegisterMapGetFormat(pMap);
901    u2 numEntries = dvmRegisterMapGetNumEntries(pMap);
902
903    assert(numEntries > 0);
904
905    switch (format) {
906    case kRegMapFormatNone:
907        return NULL;
908    case kRegMapFormatCompact8:
909        addrWidth = 1;
910        break;
911    case kRegMapFormatCompact16:
912        addrWidth = 2;
913        break;
914    default:
915        ALOGE("Unknown format %d", format);
916        dvmAbort();
917        return NULL;
918    }
919
920    lineWidth = addrWidth + pMap->regWidth;
921
922    /*
923     * Find the appropriate entry.  Many maps are very small, some are very
924     * large.
925     */
926    static const int kSearchThreshold = 8;
927    const u1* data = NULL;
928    int lineAddr;
929
930    if (numEntries < kSearchThreshold) {
931        int i;
932        data = pMap->data;
933        for (i = numEntries; i > 0; i--) {
934            lineAddr = data[0];
935            if (addrWidth > 1)
936                lineAddr |= data[1] << 8;
937            if (lineAddr == addr)
938                return data + addrWidth;
939
940            data += lineWidth;
941        }
942        assert(data == pMap->data + lineWidth * numEntries);
943    } else {
944        int hi, lo, mid;
945
946        lo = 0;
947        hi = numEntries -1;
948
949        while (hi >= lo) {
950            mid = (hi + lo) / 2;
951            data = pMap->data + lineWidth * mid;
952
953            lineAddr = data[0];
954            if (addrWidth > 1)
955                lineAddr |= data[1] << 8;
956
957            if (addr > lineAddr) {
958                lo = mid + 1;
959            } else if (addr < lineAddr) {
960                hi = mid - 1;
961            } else {
962                return data + addrWidth;
963            }
964        }
965    }
966
967    return NULL;
968}
969
970/*
971 * Compare two register maps.
972 *
973 * Returns 0 if they're equal, nonzero if not.
974 */
975static int compareMaps(const RegisterMap* pMap1, const RegisterMap* pMap2)
976{
977    size_t size1, size2;
978
979    size1 = computeRegisterMapSize(pMap1);
980    size2 = computeRegisterMapSize(pMap2);
981    if (size1 != size2) {
982        ALOGI("compareMaps: size mismatch (%zd vs %zd)", size1, size2);
983        return -1;
984    }
985
986    if (memcmp(pMap1, pMap2, size1) != 0) {
987        ALOGI("compareMaps: content mismatch");
988        return -1;
989    }
990
991    return 0;
992}
993
994
995/*
996 * Get the expanded form of the register map associated with the method.
997 *
998 * If the map is already in one of the uncompressed formats, we return
999 * immediately.  Otherwise, we expand the map and replace method's register
1000 * map pointer, freeing it if it was allocated on the heap.
1001 *
1002 * NOTE: this function is not synchronized; external locking is mandatory
1003 * (unless we're in the zygote, where single-threaded access is guaranteed).
1004 */
1005const RegisterMap* dvmGetExpandedRegisterMap0(Method* method)
1006{
1007    const RegisterMap* curMap = method->registerMap;
1008    RegisterMap* newMap;
1009
1010    if (curMap == NULL)
1011        return NULL;
1012
1013    /* sanity check to ensure this isn't called w/o external locking */
1014    /* (if we use this at a time other than during GC, fix/remove this test) */
1015    if (true) {
1016        if (!gDvm.zygote && dvmTryLockMutex(&gDvm.gcHeapLock) == 0) {
1017            ALOGE("GLITCH: dvmGetExpandedRegisterMap not called at GC time");
1018            dvmAbort();
1019        }
1020    }
1021
1022    RegisterMapFormat format = dvmRegisterMapGetFormat(curMap);
1023    switch (format) {
1024    case kRegMapFormatCompact8:
1025    case kRegMapFormatCompact16:
1026        if (REGISTER_MAP_VERBOSE) {
1027            if (dvmRegisterMapGetOnHeap(curMap)) {
1028                ALOGD("RegMap: already expanded: %s.%s",
1029                    method->clazz->descriptor, method->name);
1030            } else {
1031                ALOGD("RegMap: stored w/o compression: %s.%s",
1032                    method->clazz->descriptor, method->name);
1033            }
1034        }
1035        return curMap;
1036    case kRegMapFormatDifferential:
1037        newMap = uncompressMapDifferential(curMap);
1038        break;
1039    default:
1040        ALOGE("Unknown format %d in dvmGetExpandedRegisterMap", format);
1041        dvmAbort();
1042        newMap = NULL;      // make gcc happy
1043    }
1044
1045    if (newMap == NULL) {
1046        ALOGE("Map failed to uncompress (fmt=%d) %s.%s",
1047            format, method->clazz->descriptor, method->name);
1048        return NULL;
1049    }
1050
1051#ifdef REGISTER_MAP_STATS
1052    /*
1053     * Gather and display some stats.
1054     */
1055    {
1056        MapStats* pStats = (MapStats*) gDvm.registerMapStats;
1057        pStats->numExpandedMaps++;
1058        pStats->totalExpandedMapSize += computeRegisterMapSize(newMap);
1059        ALOGD("RMAP: count=%d size=%d",
1060            pStats->numExpandedMaps, pStats->totalExpandedMapSize);
1061    }
1062#endif
1063
1064    IF_ALOGV() {
1065        char* desc = dexProtoCopyMethodDescriptor(&method->prototype);
1066        ALOGV("Expanding map -> %s.%s:%s",
1067            method->clazz->descriptor, method->name, desc);
1068        free(desc);
1069    }
1070
1071    /*
1072     * Update method, and free compressed map if it was sitting on the heap.
1073     */
1074    dvmSetRegisterMap(method, newMap);
1075
1076    if (dvmRegisterMapGetOnHeap(curMap))
1077        dvmFreeRegisterMap((RegisterMap*) curMap);
1078
1079    return newMap;
1080}
1081
1082
1083/*
1084 * ===========================================================================
1085 *      Map compression
1086 * ===========================================================================
1087 */
1088
1089/*
1090Notes on map compression
1091
1092The idea is to create a compressed form that will be uncompressed before
1093use, with the output possibly saved in a cache.  This means we can use an
1094approach that is unsuited for random access if we choose.
1095
1096In the event that a map simply does not work with our compression scheme,
1097it's reasonable to store the map without compression.  In the future we
1098may want to have more than one compression scheme, and try each in turn,
1099retaining the best.  (We certainly want to keep the uncompressed form if it
1100turns out to be smaller or even slightly larger than the compressed form.)
1101
1102Each entry consists of an address and a bit vector.  Adjacent entries are
1103strongly correlated, suggesting differential encoding.
1104
1105
1106Ideally we would avoid outputting adjacent entries with identical
1107bit vectors.  However, the register values at a given address do not
1108imply anything about the set of valid registers at subsequent addresses.
1109We therefore cannot omit an entry.
1110
1111  If the thread stack has a PC at an address without a corresponding
1112  entry in the register map, we must conservatively scan the registers in
1113  that thread.  This can happen when single-stepping in the debugger,
1114  because the debugger is allowed to invoke arbitrary methods when
1115  a thread is stopped at a breakpoint.  If we can guarantee that a GC
1116  thread scan will never happen while the debugger has that thread stopped,
1117  then we can lift this restriction and simply omit entries that don't
1118  change the bit vector from its previous state.
1119
1120Each entry advances the address value by at least 1 (measured in 16-bit
1121"code units").  Looking at the bootclasspath entries, advancing by 2 units
1122is most common.  Advances by 1 unit are far less common than advances by
11232 units, but more common than 5, and things fall off rapidly.  Gaps of
1124up to 220 code units appear in some computationally intensive bits of code,
1125but are exceedingly rare.
1126
1127If we sum up the number of transitions in a couple of ranges in framework.jar:
1128  [1,4]: 188998 of 218922 gaps (86.3%)
1129  [1,7]: 211647 of 218922 gaps (96.7%)
1130Using a 3-bit delta, with one value reserved as an escape code, should
1131yield good results for the address.
1132
1133These results would change dramatically if we reduced the set of GC
1134points by e.g. removing instructions like integer divide that are only
1135present because they can throw and cause an allocation.
1136
1137We also need to include an "initial gap", because the first few instructions
1138in a method may not be GC points.
1139
1140
1141By observation, many entries simply repeat the previous bit vector, or
1142change only one or two bits.  (This is with type-precise information;
1143the rate of change of bits will be different if live-precise information
1144is factored in).
1145
1146Looking again at adjacent entries in framework.jar:
1147  0 bits changed: 63.0%
1148  1 bit changed: 32.2%
1149After that it falls off rapidly, e.g. the number of entries with 2 bits
1150changed is usually less than 1/10th of the number of entries with 1 bit
1151changed.  A solution that allows us to encode 0- or 1- bit changes
1152efficiently will do well.
1153
1154We still need to handle cases where a large number of bits change.  We
1155probably want a way to drop in a full copy of the bit vector when it's
1156smaller than the representation of multiple bit changes.
1157
1158
1159The bit-change information can be encoded as an index that tells the
1160decoder to toggle the state.  We want to encode the index in as few bits
1161as possible, but we need to allow for fairly wide vectors (e.g. we have a
1162method with 175 registers).  We can deal with this in a couple of ways:
1163(1) use an encoding that assumes few registers and has an escape code
1164for larger numbers of registers; or (2) use different encodings based
1165on how many total registers the method has.  The choice depends to some
1166extent on whether methods with large numbers of registers tend to modify
1167the first 16 regs more often than the others.
1168
1169The last N registers hold method arguments.  If the bytecode is expected
1170to be examined in a debugger, "dx" ensures that the contents of these
1171registers won't change.  Depending upon the encoding format, we may be
1172able to take advantage of this.  We still have to encode the initial
1173state, but we know we'll never have to output a bit change for the last
1174N registers.
1175
1176Considering only methods with 16 or more registers, the "target octant"
1177for register changes looks like this:
1178  [ 43.1%, 16.4%, 6.5%, 6.2%, 7.4%, 8.8%, 9.7%, 1.8% ]
1179As expected, there are fewer changes at the end of the list where the
1180arguments are kept, and more changes at the start of the list because
1181register values smaller than 16 can be used in compact Dalvik instructions
1182and hence are favored for frequently-used values.  In general, the first
1183octant is considerably more active than later entries, the last octant
1184is much less active, and the rest are all about the same.
1185
1186Looking at all bit changes in all methods, 94% are to registers 0-15.  The
1187encoding will benefit greatly by favoring the low-numbered registers.
1188
1189
1190Some of the smaller methods have identical maps, and space could be
1191saved by simply including a pointer to an earlier definition.  This would
1192be best accomplished by specifying a "pointer" format value, followed by
1193a 3-byte (or ULEB128) offset.  Implementing this would probably involve
1194generating a hash value for each register map and maintaining a hash table.
1195
1196In some cases there are repeating patterns in the bit vector that aren't
1197adjacent.  These could benefit from a dictionary encoding.  This doesn't
1198really become useful until the methods reach a certain size though,
1199and managing the dictionary may incur more overhead than we want.
1200
1201Large maps can be compressed significantly.  The trouble is that, when
1202we need to use them, we have to uncompress them onto the heap.  We may
1203get a better trade-off between storage size and heap usage by refusing to
1204compress large maps, so that they can be memory mapped and used directly.
1205(OTOH, only about 2% of the maps will ever actually be used.)
1206
1207
1208----- differential format -----
1209
1210// common header
1211+00 1B format
1212+01 1B regWidth
1213+02 2B numEntries (little-endian)
1214+04 nB length in bytes of the data that follows, in ULEB128 format
1215       (not strictly necessary; allows determination of size w/o full parse)
1216+05+ 1B initial address (0-127), high bit set if max addr >= 256
1217+06+ nB initial value for bit vector
1218
1219// for each entry
1220+00: CCCCBAAA
1221
1222  AAA: address difference.  Values from 0 to 6 indicate an increment of 1
1223  to 7.  A value of 7 indicates that the address difference is large,
1224  and the next byte is a ULEB128-encoded difference value.
1225
1226  B: determines the meaning of CCCC.
1227
1228  CCCC: if B is 0, this is the number of the bit to toggle (0-15).
1229  If B is 1, this is a count of the number of changed bits (1-14).  A value
1230  of 0 means that no bits were changed, and a value of 15 indicates
1231  that enough bits were changed that it required less space to output
1232  the entire bit vector.
1233
1234+01: (optional) ULEB128-encoded address difference
1235
1236+01+: (optional) one or more ULEB128-encoded bit numbers, OR the entire
1237  bit vector.
1238
1239The most common situation is an entry whose address has changed by 2-4
1240code units, has no changes or just a single bit change, and the changed
1241register is less than 16.  We should therefore be able to encode a large
1242number of entries with a single byte, which is half the size of the
1243Compact8 encoding method.
1244*/
1245
1246/*
1247 * Compute some stats on an uncompressed register map.
1248 */
1249#ifdef REGISTER_MAP_STATS
1250static void computeMapStats(RegisterMap* pMap, const Method* method)
1251{
1252    MapStats* pStats = (MapStats*) gDvm.registerMapStats;
1253    const u1 format = dvmRegisterMapGetFormat(pMap);
1254    const u2 numEntries = dvmRegisterMapGetNumEntries(pMap);
1255    const u1* rawMap = pMap->data;
1256    const u1* prevData = NULL;
1257    int ent, addr, prevAddr = -1;
1258
1259    for (ent = 0; ent < numEntries; ent++) {
1260        switch (format) {
1261        case kRegMapFormatCompact8:
1262            addr = *rawMap++;
1263            break;
1264        case kRegMapFormatCompact16:
1265            addr = *rawMap++;
1266            addr |= (*rawMap++) << 8;
1267            break;
1268        default:
1269            /* shouldn't happen */
1270            ALOGE("GLITCH: bad format (%d)", format);
1271            dvmAbort();
1272        }
1273
1274        const u1* dataStart = rawMap;
1275
1276        pStats->totalGcPointCount++;
1277
1278        /*
1279         * Gather "gap size" stats, i.e. the difference in addresses between
1280         * successive GC points.
1281         */
1282        if (prevData != NULL) {
1283            assert(prevAddr >= 0);
1284            int addrDiff = addr - prevAddr;
1285
1286            if (addrDiff < 0) {
1287                ALOGE("GLITCH: address went backward (0x%04x->0x%04x, %s.%s)",
1288                    prevAddr, addr, method->clazz->descriptor, method->name);
1289            } else if (addrDiff > kMaxGcPointGap) {
1290                if (REGISTER_MAP_VERBOSE) {
1291                    ALOGI("HEY: addrDiff is %d, max %d (0x%04x->0x%04x %s.%s)",
1292                        addrDiff, kMaxGcPointGap, prevAddr, addr,
1293                        method->clazz->descriptor, method->name);
1294                }
1295                /* skip this one */
1296            } else {
1297                pStats->gcPointGap[addrDiff]++;
1298            }
1299            pStats->gcGapCount++;
1300
1301
1302            /*
1303             * Compare bit vectors in adjacent entries.  We want to count
1304             * up the number of bits that differ (to see if we frequently
1305             * change 0 or 1 bits) and get a sense for which part of the
1306             * vector changes the most often (near the start, middle, end).
1307             *
1308             * We only do the vector position quantization if we have at
1309             * least 16 registers in the method.
1310             */
1311            int numDiff = 0;
1312            float div = (float) kNumUpdatePosns / method->registersSize;
1313            int regByte;
1314            for (regByte = 0; regByte < pMap->regWidth; regByte++) {
1315                int prev, cur, bit;
1316
1317                prev = prevData[regByte];
1318                cur = dataStart[regByte];
1319
1320                for (bit = 0; bit < 8; bit++) {
1321                    if (((prev >> bit) & 1) != ((cur >> bit) & 1)) {
1322                        numDiff++;
1323
1324                        int bitNum = regByte * 8 + bit;
1325
1326                        if (bitNum < 16)
1327                            pStats->updateLT16++;
1328                        else
1329                            pStats->updateGE16++;
1330
1331                        if (method->registersSize < 16)
1332                            continue;
1333
1334                        if (bitNum >= method->registersSize) {
1335                            /* stuff off the end should be zero in both */
1336                            ALOGE("WEIRD: bit=%d (%d/%d), prev=%02x cur=%02x",
1337                                bit, regByte, method->registersSize,
1338                                prev, cur);
1339                            assert(false);
1340                        }
1341                        int idx = (int) (bitNum * div);
1342                        if (!(idx >= 0 && idx < kNumUpdatePosns)) {
1343                            ALOGE("FAIL: bitNum=%d (of %d) div=%.3f idx=%d",
1344                                bitNum, method->registersSize, div, idx);
1345                            assert(false);
1346                        }
1347                        pStats->updatePosn[idx]++;
1348                    }
1349                }
1350            }
1351
1352            if (numDiff > kMaxDiffBits) {
1353                if (REGISTER_MAP_VERBOSE) {
1354                    ALOGI("WOW: numDiff is %d, max %d", numDiff, kMaxDiffBits);
1355                }
1356            } else {
1357                pStats->numDiffBits[numDiff]++;
1358            }
1359        }
1360
1361        /* advance to start of next line */
1362        rawMap += pMap->regWidth;
1363
1364        prevAddr = addr;
1365        prevData = dataStart;
1366    }
1367}
1368#endif
1369
1370/*
1371 * Compute the difference between two bit vectors.
1372 *
1373 * If "lebOutBuf" is non-NULL, we output the bit indices in ULEB128 format
1374 * as we go.  Otherwise, we just generate the various counts.
1375 *
1376 * The bit vectors are compared byte-by-byte, so any unused bits at the
1377 * end must be zero.
1378 *
1379 * Returns the number of bytes required to hold the ULEB128 output.
1380 *
1381 * If "pFirstBitChanged" or "pNumBitsChanged" are non-NULL, they will
1382 * receive the index of the first changed bit and the number of changed
1383 * bits, respectively.
1384 */
1385static int computeBitDiff(const u1* bits1, const u1* bits2, int byteWidth,
1386    int* pFirstBitChanged, int* pNumBitsChanged, u1* lebOutBuf)
1387{
1388    int numBitsChanged = 0;
1389    int firstBitChanged = -1;
1390    int lebSize = 0;
1391    int byteNum;
1392
1393    /*
1394     * Run through the vectors, first comparing them at the byte level.  This
1395     * will yield a fairly quick result if nothing has changed between them.
1396     */
1397    for (byteNum = 0; byteNum < byteWidth; byteNum++) {
1398        u1 byte1 = *bits1++;
1399        u1 byte2 = *bits2++;
1400        if (byte1 != byte2) {
1401            /*
1402             * Walk through the byte, identifying the changed bits.
1403             */
1404            int bitNum;
1405            for (bitNum = 0; bitNum < 8; bitNum++) {
1406                if (((byte1 >> bitNum) & 0x01) != ((byte2 >> bitNum) & 0x01)) {
1407                    int bitOffset = (byteNum << 3) + bitNum;
1408
1409                    if (firstBitChanged < 0)
1410                        firstBitChanged = bitOffset;
1411                    numBitsChanged++;
1412
1413                    if (lebOutBuf == NULL) {
1414                        lebSize += unsignedLeb128Size(bitOffset);
1415                    } else {
1416                        u1* curBuf = lebOutBuf;
1417                        lebOutBuf = writeUnsignedLeb128(lebOutBuf, bitOffset);
1418                        lebSize += lebOutBuf - curBuf;
1419                    }
1420                }
1421            }
1422        }
1423    }
1424
1425    if (numBitsChanged > 0)
1426        assert(firstBitChanged >= 0);
1427
1428    if (pFirstBitChanged != NULL)
1429        *pFirstBitChanged = firstBitChanged;
1430    if (pNumBitsChanged != NULL)
1431        *pNumBitsChanged = numBitsChanged;
1432
1433    return lebSize;
1434}
1435
1436/*
1437 * Compress the register map with differential encoding.
1438 *
1439 * "meth" is only needed for debug output.
1440 *
1441 * On success, returns a newly-allocated RegisterMap.  If the map is not
1442 * compatible for some reason, or fails to get smaller, this will return NULL.
1443 */
1444static RegisterMap* compressMapDifferential(const RegisterMap* pMap,
1445    const Method* meth)
1446{
1447    RegisterMap* pNewMap = NULL;
1448    int origSize = computeRegisterMapSize(pMap);
1449    u1* tmpPtr;
1450    int addrWidth, regWidth, numEntries;
1451    bool debug = false;
1452
1453    if (false &&
1454        strcmp(meth->clazz->descriptor, "Landroid/text/StaticLayout;") == 0 &&
1455        strcmp(meth->name, "generate") == 0)
1456    {
1457        debug = true;
1458    }
1459
1460    u1 format = dvmRegisterMapGetFormat(pMap);
1461    switch (format) {
1462    case kRegMapFormatCompact8:
1463        addrWidth = 1;
1464        break;
1465    case kRegMapFormatCompact16:
1466        addrWidth = 2;
1467        break;
1468    default:
1469        ALOGE("ERROR: can't compress map with format=%d", format);
1470        return NULL;
1471    }
1472
1473    regWidth = dvmRegisterMapGetRegWidth(pMap);
1474    numEntries = dvmRegisterMapGetNumEntries(pMap);
1475
1476    if (debug) {
1477        ALOGI("COMPRESS: %s.%s aw=%d rw=%d ne=%d",
1478            meth->clazz->descriptor, meth->name,
1479            addrWidth, regWidth, numEntries);
1480        dumpRegisterMap(pMap, -1);
1481    }
1482
1483    if (numEntries <= 1) {
1484        ALOGV("Can't compress map with 0 or 1 entries");
1485        return NULL;
1486    }
1487
1488    /*
1489     * We don't know how large the compressed data will be.  It's possible
1490     * for it to expand and become larger than the original.  The header
1491     * itself is variable-sized, so we generate everything into a temporary
1492     * buffer and then copy it to form-fitting storage once we know how big
1493     * it will be (and that it's smaller than the original).
1494     *
1495     * If we use a size that is equal to the size of the input map plus
1496     * a value longer than a single entry can possibly expand to, we need
1497     * only check for overflow at the end of each entry.  The worst case
1498     * for a single line is (1 + <ULEB8 address> + <full copy of vector>).
1499     * Addresses are 16 bits, so that's (1 + 3 + regWidth).
1500     *
1501     * The initial address offset and bit vector will take up less than
1502     * or equal to the amount of space required when uncompressed -- large
1503     * initial offsets are rejected.
1504     */
1505    UniquePtr<u1[]> tmpBuf(new u1[origSize + (1 + 3 + regWidth)]);
1506    if (tmpBuf.get() == NULL)
1507        return NULL;
1508
1509    tmpPtr = tmpBuf.get();
1510
1511    const u1* mapData = pMap->data;
1512    const u1* prevBits;
1513    u2 addr, prevAddr;
1514
1515    addr = *mapData++;
1516    if (addrWidth > 1)
1517        addr |= (*mapData++) << 8;
1518
1519    if (addr >= 128) {
1520        ALOGV("Can't compress map with starting address >= 128");
1521        return NULL;
1522    }
1523
1524    /*
1525     * Start by writing the initial address and bit vector data.  The high
1526     * bit of the initial address is used to indicate the required address
1527     * width (which the decoder can't otherwise determine without parsing
1528     * the compressed data).
1529     */
1530    *tmpPtr++ = addr | (addrWidth > 1 ? 0x80 : 0x00);
1531    memcpy(tmpPtr, mapData, regWidth);
1532
1533    prevBits = mapData;
1534    prevAddr = addr;
1535
1536    tmpPtr += regWidth;
1537    mapData += regWidth;
1538
1539    /*
1540     * Loop over all following entries.
1541     */
1542    for (int entry = 1; entry < numEntries; entry++) {
1543        int addrDiff;
1544        u1 key;
1545
1546        /*
1547         * Pull out the address and figure out how to encode it.
1548         */
1549        addr = *mapData++;
1550        if (addrWidth > 1)
1551            addr |= (*mapData++) << 8;
1552
1553        if (debug)
1554            ALOGI(" addr=0x%04x ent=%d (aw=%d)", addr, entry, addrWidth);
1555
1556        addrDiff = addr - prevAddr;
1557        assert(addrDiff > 0);
1558        if (addrDiff < 8) {
1559            /* small difference, encode in 3 bits */
1560            key = addrDiff -1;          /* set 00000AAA */
1561            if (debug)
1562                ALOGI(" : small %d, key=0x%02x", addrDiff, key);
1563        } else {
1564            /* large difference, output escape code */
1565            key = 0x07;                 /* escape code for AAA */
1566            if (debug)
1567                ALOGI(" : large %d, key=0x%02x", addrDiff, key);
1568        }
1569
1570        int numBitsChanged, firstBitChanged, lebSize;
1571
1572        lebSize = computeBitDiff(prevBits, mapData, regWidth,
1573            &firstBitChanged, &numBitsChanged, NULL);
1574
1575        if (debug) {
1576            ALOGI(" : diff fbc=%d nbc=%d ls=%d (rw=%d)",
1577                firstBitChanged, numBitsChanged, lebSize, regWidth);
1578        }
1579
1580        if (numBitsChanged == 0) {
1581            /* set B to 1 and CCCC to zero to indicate no bits were changed */
1582            key |= 0x08;
1583            if (debug) ALOGI(" : no bits changed");
1584        } else if (numBitsChanged == 1 && firstBitChanged < 16) {
1585            /* set B to 0 and CCCC to the index of the changed bit */
1586            key |= firstBitChanged << 4;
1587            if (debug) ALOGI(" : 1 low bit changed");
1588        } else if (numBitsChanged < 15 && lebSize < regWidth) {
1589            /* set B to 1 and CCCC to the number of bits */
1590            key |= 0x08 | (numBitsChanged << 4);
1591            if (debug) ALOGI(" : some bits changed");
1592        } else {
1593            /* set B to 1 and CCCC to 0x0f so we store the entire vector */
1594            key |= 0x08 | 0xf0;
1595            if (debug) ALOGI(" : encode original");
1596        }
1597
1598        /*
1599         * Encode output.  Start with the key, follow with the address
1600         * diff (if it didn't fit in 3 bits), then the changed bit info.
1601         */
1602        *tmpPtr++ = key;
1603        if ((key & 0x07) == 0x07)
1604            tmpPtr = writeUnsignedLeb128(tmpPtr, addrDiff);
1605
1606        if ((key & 0x08) != 0) {
1607            int bitCount = key >> 4;
1608            if (bitCount == 0) {
1609                /* nothing changed, no additional output required */
1610            } else if (bitCount == 15) {
1611                /* full vector is most compact representation */
1612                memcpy(tmpPtr, mapData, regWidth);
1613                tmpPtr += regWidth;
1614            } else {
1615                /* write bit indices in LEB128 format */
1616                (void) computeBitDiff(prevBits, mapData, regWidth,
1617                    NULL, NULL, tmpPtr);
1618                tmpPtr += lebSize;
1619            }
1620        } else {
1621            /* single-bit changed, value encoded in key byte */
1622        }
1623
1624        prevBits = mapData;
1625        prevAddr = addr;
1626        mapData += regWidth;
1627
1628        /*
1629         * See if we've run past the original size.
1630         */
1631        if (tmpPtr - tmpBuf.get() >= origSize) {
1632            if (debug) {
1633                ALOGD("Compressed size >= original (%d vs %d): %s.%s",
1634                    tmpPtr - tmpBuf.get(), origSize,
1635                    meth->clazz->descriptor, meth->name);
1636            }
1637            return NULL;
1638        }
1639    }
1640
1641    /*
1642     * Create a RegisterMap with the contents.
1643     *
1644     * TODO: consider using a threshold other than merely ">=".  We would
1645     * get poorer compression but potentially use less native heap space.
1646     */
1647    static const int kHeaderSize = offsetof(RegisterMap, data);
1648    int newDataSize = tmpPtr - tmpBuf.get();
1649    int newMapSize;
1650
1651    newMapSize = kHeaderSize + unsignedLeb128Size(newDataSize) + newDataSize;
1652    if (newMapSize >= origSize) {
1653        if (debug) {
1654            ALOGD("Final comp size >= original (%d vs %d): %s.%s",
1655                newMapSize, origSize, meth->clazz->descriptor, meth->name);
1656        }
1657        return NULL;
1658    }
1659
1660    pNewMap = (RegisterMap*) malloc(newMapSize);
1661    if (pNewMap == NULL)
1662        return NULL;
1663    dvmRegisterMapSetFormat(pNewMap, kRegMapFormatDifferential);
1664    dvmRegisterMapSetOnHeap(pNewMap, true);
1665    dvmRegisterMapSetRegWidth(pNewMap, regWidth);
1666    dvmRegisterMapSetNumEntries(pNewMap, numEntries);
1667
1668    tmpPtr = pNewMap->data;
1669    tmpPtr = writeUnsignedLeb128(tmpPtr, newDataSize);
1670    memcpy(tmpPtr, tmpBuf.get(), newDataSize);
1671
1672    if (REGISTER_MAP_VERBOSE) {
1673        ALOGD("Compression successful (%d -> %d) from aw=%d rw=%d ne=%d",
1674            computeRegisterMapSize(pMap), computeRegisterMapSize(pNewMap),
1675            addrWidth, regWidth, numEntries);
1676    }
1677
1678    return pNewMap;
1679}
1680
1681/*
1682 * Toggle the value of the "idx"th bit in "ptr".
1683 */
1684static inline void toggleBit(u1* ptr, int idx)
1685{
1686    ptr[idx >> 3] ^= 1 << (idx & 0x07);
1687}
1688
1689/*
1690 * Expand a compressed map to an uncompressed form.
1691 *
1692 * Returns a newly-allocated RegisterMap on success, or NULL on failure.
1693 *
1694 * TODO: consider using the linear allocator or a custom allocator with
1695 * LRU replacement for these instead of the native heap.
1696 */
1697static RegisterMap* uncompressMapDifferential(const RegisterMap* pMap)
1698{
1699    static const int kHeaderSize = offsetof(RegisterMap, data);
1700    u1 format = dvmRegisterMapGetFormat(pMap);
1701    RegisterMapFormat newFormat;
1702    int regWidth, numEntries, newAddrWidth, newMapSize;
1703
1704    if (format != kRegMapFormatDifferential) {
1705        ALOGE("Not differential (%d)", format);
1706        return NULL;
1707    }
1708
1709    regWidth = dvmRegisterMapGetRegWidth(pMap);
1710    numEntries = dvmRegisterMapGetNumEntries(pMap);
1711
1712    /* get the data size; we can check this at the end */
1713    const u1* srcPtr = pMap->data;
1714    int expectedSrcLen = readUnsignedLeb128(&srcPtr);
1715    const u1* srcStart = srcPtr;
1716
1717    /* get the initial address and the 16-bit address flag */
1718    int addr = *srcPtr & 0x7f;
1719    if ((*srcPtr & 0x80) == 0) {
1720        newFormat = kRegMapFormatCompact8;
1721        newAddrWidth = 1;
1722    } else {
1723        newFormat = kRegMapFormatCompact16;
1724        newAddrWidth = 2;
1725    }
1726    srcPtr++;
1727
1728    /* now we know enough to allocate the new map */
1729    if (REGISTER_MAP_VERBOSE) {
1730        ALOGI("Expanding to map aw=%d rw=%d ne=%d",
1731            newAddrWidth, regWidth, numEntries);
1732    }
1733    newMapSize = kHeaderSize + (newAddrWidth + regWidth) * numEntries;
1734    RegisterMap* pNewMap = (RegisterMap*) malloc(newMapSize);
1735
1736    if (pNewMap == NULL)
1737      return NULL;
1738
1739    dvmRegisterMapSetFormat(pNewMap, newFormat);
1740    dvmRegisterMapSetOnHeap(pNewMap, true);
1741    dvmRegisterMapSetRegWidth(pNewMap, regWidth);
1742    dvmRegisterMapSetNumEntries(pNewMap, numEntries);
1743
1744    /*
1745     * Write the start address and initial bits to the new map.
1746     */
1747    u1* dstPtr = pNewMap->data;
1748
1749    *dstPtr++ = addr & 0xff;
1750    if (newAddrWidth > 1)
1751        *dstPtr++ = (u1) (addr >> 8);
1752
1753    memcpy(dstPtr, srcPtr, regWidth);
1754
1755    int prevAddr = addr;
1756    const u1* prevBits = dstPtr;    /* point at uncompressed data */
1757
1758    dstPtr += regWidth;
1759    srcPtr += regWidth;
1760
1761    /*
1762     * Walk through, uncompressing one line at a time.
1763     */
1764    int entry;
1765    for (entry = 1; entry < numEntries; entry++) {
1766        int addrDiff;
1767        u1 key;
1768
1769        key = *srcPtr++;
1770
1771        /* get the address */
1772        if ((key & 0x07) == 7) {
1773            /* address diff follows in ULEB128 */
1774            addrDiff = readUnsignedLeb128(&srcPtr);
1775        } else {
1776            addrDiff = (key & 0x07) +1;
1777        }
1778
1779        addr = prevAddr + addrDiff;
1780        *dstPtr++ = addr & 0xff;
1781        if (newAddrWidth > 1)
1782            *dstPtr++ = (u1) (addr >> 8);
1783
1784        /* unpack the bits */
1785        if ((key & 0x08) != 0) {
1786            int bitCount = (key >> 4);
1787            if (bitCount == 0) {
1788                /* no bits changed, just copy previous */
1789                memcpy(dstPtr, prevBits, regWidth);
1790            } else if (bitCount == 15) {
1791                /* full copy of bit vector is present; ignore prevBits */
1792                memcpy(dstPtr, srcPtr, regWidth);
1793                srcPtr += regWidth;
1794            } else {
1795                /* copy previous bits and modify listed indices */
1796                memcpy(dstPtr, prevBits, regWidth);
1797                while (bitCount--) {
1798                    int bitIndex = readUnsignedLeb128(&srcPtr);
1799                    toggleBit(dstPtr, bitIndex);
1800                }
1801            }
1802        } else {
1803            /* copy previous bits and modify the specified one */
1804            memcpy(dstPtr, prevBits, regWidth);
1805
1806            /* one bit, from 0-15 inclusive, was changed */
1807            toggleBit(dstPtr, key >> 4);
1808        }
1809
1810        prevAddr = addr;
1811        prevBits = dstPtr;
1812        dstPtr += regWidth;
1813    }
1814
1815    if (dstPtr - (u1*) pNewMap != newMapSize) {
1816        ALOGE("ERROR: output %d bytes, expected %d",
1817            dstPtr - (u1*) pNewMap, newMapSize);
1818        free(pNewMap);
1819        return NULL;
1820    }
1821
1822    if (srcPtr - srcStart != expectedSrcLen) {
1823        ALOGE("ERROR: consumed %d bytes, expected %d",
1824            srcPtr - srcStart, expectedSrcLen);
1825        free(pNewMap);
1826        return NULL;
1827    }
1828
1829    if (REGISTER_MAP_VERBOSE) {
1830        ALOGD("Expansion successful (%d -> %d)",
1831            computeRegisterMapSize(pMap), computeRegisterMapSize(pNewMap));
1832    }
1833
1834    return pNewMap;
1835}
1836