VfyBasicBlock.cpp revision 062bf509a77fce9dfcb7e7b2e401cf2a124d83d5
1/*
2 * Copyright (C) 2010 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Verifier basic block functions.
19 */
20#include "Dalvik.h"
21#include "analysis/VfyBasicBlock.h"
22#include "analysis/CodeVerify.h"
23#include "analysis/VerifySubs.h"
24#include "libdex/DexCatch.h"
25#include "libdex/InstrUtils.h"
26
27
28/*
29 * Extract the list of catch handlers from "pTry" into "addrBuf".
30 *
31 * Returns the size of the catch handler list.  If the return value
32 * exceeds "addrBufSize", the items at the end of the list will not be
33 * represented in the output array, and this function should be called
34 * again with a larger buffer.
35 */
36static u4 extractCatchHandlers(const DexCode* pCode, const DexTry* pTry,
37    u4* addrBuf, size_t addrBufSize)
38{
39    DexCatchIterator iterator;
40    unsigned int idx = 0;
41
42    dexCatchIteratorInit(&iterator, pCode, pTry->handlerOff);
43    while (true) {
44        DexCatchHandler* handler = dexCatchIteratorNext(&iterator);
45        if (handler == NULL)
46            break;
47
48        if (idx < addrBufSize) {
49            addrBuf[idx] = handler->address;
50        }
51        idx++;
52    }
53
54    return idx;
55}
56
57/*
58 * Returns "true" if the instruction represents a data chunk, such as a
59 * switch statement block.
60 */
61static bool isDataChunk(u2 insn)
62{
63    return (insn == kPackedSwitchSignature ||
64            insn == kSparseSwitchSignature ||
65            insn == kArrayDataSignature);
66}
67
68/*
69 * Alloc a basic block in the specified slot.  The storage will be
70 * initialized.
71 */
72static VfyBasicBlock* allocVfyBasicBlock(VerifierData* vdata, u4 idx)
73{
74    VfyBasicBlock* newBlock = (VfyBasicBlock*) calloc(1, sizeof(VfyBasicBlock));
75    if (newBlock == NULL)
76        return NULL;
77
78    /*
79     * TODO: there is no good default size here -- the problem is that most
80     * addresses will only have one predecessor, but a fair number will
81     * have 10+, and a few will have 100+ (e.g. the synthetic "finally"
82     * in a large synchronized method).  We probably want to use a small
83     * base allocation (perhaps two) and then have the first overflow
84     * allocation jump dramatically (to 32 or thereabouts).
85     */
86    newBlock->predecessors = dvmPointerSetAlloc(32);
87    if (newBlock->predecessors == NULL) {
88        free(newBlock);
89        return NULL;
90    }
91
92    newBlock->firstAddr = (u4) -1;      // DEBUG
93
94    newBlock->liveRegs = dvmAllocBitVector(vdata->insnRegCount, false);
95    if (newBlock->liveRegs == NULL) {
96        dvmPointerSetFree(newBlock->predecessors);
97        free(newBlock);
98        return NULL;
99    }
100
101    return newBlock;
102}
103
104/*
105 * Add "curBlock" to the predecessor list in "targetIdx".
106 */
107static bool addToPredecessor(VerifierData* vdata, VfyBasicBlock* curBlock,
108    u4 targetIdx)
109{
110    assert(targetIdx < vdata->insnsSize);
111
112    /*
113     * Allocate the target basic block if necessary.  This will happen
114     * on e.g. forward branches.
115     *
116     * We can't fill in all the fields, but that will happen automatically
117     * when we get to that part of the code.
118     */
119    VfyBasicBlock* targetBlock = vdata->basicBlocks[targetIdx];
120    if (targetBlock == NULL) {
121        targetBlock = allocVfyBasicBlock(vdata, targetIdx);
122        if (targetBlock == NULL)
123            return false;
124        vdata->basicBlocks[targetIdx] = targetBlock;
125    }
126
127    PointerSet* preds = targetBlock->predecessors;
128    bool added = dvmPointerSetAddEntry(preds, curBlock);
129    if (!added) {
130        /*
131         * This happens sometimes for packed-switch instructions, where
132         * the same target address appears more than once.  Also, a
133         * (pointless) conditional branch to the next instruction will
134         * trip over this.
135         */
136        ALOGV("ODD: point set for targ=0x%04x (%p) already had block "
137             "fir=0x%04x (%p)",
138            targetIdx, targetBlock, curBlock->firstAddr, curBlock);
139    }
140
141    return true;
142}
143
144/*
145 * Add ourselves to the predecessor list in all blocks we might transfer
146 * control to.
147 *
148 * There are four ways to proceed to a new instruction:
149 *  (1) continue to the following instruction
150 *  (2) [un]conditionally branch to a specific location
151 *  (3) conditionally branch through a "switch" statement
152 *  (4) throw an exception
153 *
154 * Returning from the method (via a return statement or an uncaught
155 * exception) are not interesting for liveness analysis.
156 */
157static bool setPredecessors(VerifierData* vdata, VfyBasicBlock* curBlock,
158    u4 curIdx, OpcodeFlags opFlags, u4 nextIdx, u4* handlerList,
159    size_t numHandlers)
160{
161    const InsnFlags* insnFlags = vdata->insnFlags;
162    const Method* meth = vdata->method;
163
164    unsigned int handlerIdx;
165    for (handlerIdx = 0; handlerIdx < numHandlers; handlerIdx++) {
166        if (!addToPredecessor(vdata, curBlock, handlerList[handlerIdx]))
167            return false;
168    }
169
170    if ((opFlags & kInstrCanContinue) != 0) {
171        if (!addToPredecessor(vdata, curBlock, nextIdx))
172            return false;
173    }
174    if ((opFlags & kInstrCanBranch) != 0) {
175        bool unused, gotBranch;
176        s4 branchOffset, absOffset;
177
178        gotBranch = dvmGetBranchOffset(meth, insnFlags, curIdx,
179                &branchOffset, &unused);
180        assert(gotBranch);
181        absOffset = curIdx + branchOffset;
182        assert(absOffset >= 0 && (u4) absOffset < vdata->insnsSize);
183
184        if (!addToPredecessor(vdata, curBlock, absOffset))
185            return false;
186    }
187
188    if ((opFlags & kInstrCanSwitch) != 0) {
189        const u2* curInsn = &meth->insns[curIdx];
190        const u2* dataPtr;
191
192        /* these values have already been verified, so we can trust them */
193        s4 offsetToData = curInsn[1] | ((s4) curInsn[2]) << 16;
194        dataPtr = curInsn + offsetToData;
195
196        /*
197         * dataPtr points to the start of the switch data.  The first
198         * item is the NOP+magic, the second is the number of entries in
199         * the switch table.
200         */
201        u2 switchCount = dataPtr[1];
202
203        /*
204         * Skip past the ident field, size field, and the first_key field
205         * (for packed) or the key list (for sparse).
206         */
207        if (dexOpcodeFromCodeUnit(meth->insns[curIdx]) == OP_PACKED_SWITCH) {
208            dataPtr += 4;
209        } else {
210            assert(dexOpcodeFromCodeUnit(meth->insns[curIdx]) ==
211                    OP_SPARSE_SWITCH);
212            dataPtr += 2 + 2 * switchCount;
213        }
214
215        u4 switchIdx;
216        for (switchIdx = 0; switchIdx < switchCount; switchIdx++) {
217            s4 offset, absOffset;
218
219            offset = (s4) dataPtr[switchIdx*2] |
220                     (s4) (dataPtr[switchIdx*2 +1] << 16);
221            absOffset = curIdx + offset;
222            assert(absOffset >= 0 && (u4) absOffset < vdata->insnsSize);
223
224            if (!addToPredecessor(vdata, curBlock, absOffset))
225                return false;
226        }
227    }
228
229    if (false) {
230        if (dvmPointerSetGetCount(curBlock->predecessors) > 256) {
231            LOGI("Lots of preds at 0x%04x in %s.%s:%s", curIdx,
232                meth->clazz->descriptor, meth->name, meth->shorty);
233        }
234    }
235
236    return true;
237}
238
239/*
240 * Dump the contents of the basic blocks.
241 */
242static void dumpBasicBlocks(const VerifierData* vdata)
243{
244    char printBuf[256];
245    unsigned int idx;
246    int count;
247
248    LOGI("Basic blocks for %s.%s:%s", vdata->method->clazz->descriptor,
249        vdata->method->name, vdata->method->shorty);
250    for (idx = 0; idx < vdata->insnsSize; idx++) {
251        VfyBasicBlock* block = vdata->basicBlocks[idx];
252        if (block == NULL)
253            continue;
254
255        assert(block->firstAddr == idx);
256        count = snprintf(printBuf, sizeof(printBuf), " %04x-%04x ",
257            block->firstAddr, block->lastAddr);
258
259        PointerSet* preds = block->predecessors;
260        size_t numPreds = dvmPointerSetGetCount(preds);
261
262        if (numPreds > 0) {
263            count += snprintf(printBuf + count, sizeof(printBuf) - count,
264                    "preds:");
265
266            unsigned int predIdx;
267            for (predIdx = 0; predIdx < numPreds; predIdx++) {
268                if (count >= (int) sizeof(printBuf))
269                    break;
270                const VfyBasicBlock* pred =
271                    (const VfyBasicBlock*) dvmPointerSetGetEntry(preds, predIdx);
272                count += snprintf(printBuf + count, sizeof(printBuf) - count,
273                        "%04x(%p),", pred->firstAddr, pred);
274            }
275        } else {
276            count += snprintf(printBuf + count, sizeof(printBuf) - count,
277                    "(no preds)");
278        }
279
280        printBuf[sizeof(printBuf)-2] = '!';
281        printBuf[sizeof(printBuf)-1] = '\0';
282
283        LOGI("%s", printBuf);
284    }
285
286    usleep(100 * 1000);      /* ugh...let logcat catch up */
287}
288
289
290/*
291 * Generate a list of basic blocks and related information.
292 *
293 * On success, returns "true" with vdata->basicBlocks initialized.
294 */
295bool dvmComputeVfyBasicBlocks(VerifierData* vdata)
296{
297    const InsnFlags* insnFlags = vdata->insnFlags;
298    const Method* meth = vdata->method;
299    const u4 insnsSize = vdata->insnsSize;
300    const DexCode* pCode = dvmGetMethodCode(meth);
301    const DexTry* pTries = NULL;
302    const size_t kHandlerStackAllocSize = 16;   /* max seen so far is 7 */
303    u4 handlerAddrs[kHandlerStackAllocSize];
304    u4* handlerListAlloc = NULL;
305    u4* handlerList = NULL;
306    size_t numHandlers = 0;
307    u4 idx, blockStartAddr;
308    bool result = false;
309
310    bool verbose = false; //dvmWantVerboseVerification(meth);
311    if (verbose) {
312        LOGI("Basic blocks for %s.%s:%s",
313            meth->clazz->descriptor, meth->name, meth->shorty);
314    }
315
316    /*
317     * Allocate a data structure that allows us to map from an address to
318     * the corresponding basic block.  Initially all pointers are NULL.
319     * They are populated on demand as we proceed (either when we reach a
320     * new BB, or when we need to add an item to the predecessor list in
321     * a not-yet-reached BB).
322     *
323     * Only the first instruction in the block points to the BB structure;
324     * the rest remain NULL.
325     */
326    vdata->basicBlocks =
327        (VfyBasicBlock**) calloc(insnsSize, sizeof(VfyBasicBlock*));
328    if (vdata->basicBlocks == NULL)
329      return false;
330
331    /*
332     * The "tries" list is a series of non-overlapping regions with a list
333     * of "catch" handlers.  Rather than do the "find a matching try block"
334     * computation at each step, we just walk the "try" list in parallel.
335     *
336     * Not all methods have "try" blocks.  If this one does, we init tryEnd
337     * to zero, so that the (exclusive bound) range check trips immediately.
338     */
339    u4 tryIndex = 0, tryStart = 0, tryEnd = 0;
340    if (pCode->triesSize != 0) {
341        pTries = dexGetTries(pCode);
342    }
343
344    u4 debugBBIndex = 0;
345
346    /*
347     * The address associated with a basic block is the start address.
348     */
349    blockStartAddr = 0;
350
351    for (idx = 0; idx < insnsSize; ) {
352        /*
353         * Make sure we're pointing at the right "try" block.  It should
354         * not be possible to "jump over" a block, so if we're no longer
355         * in the correct one we can just advance to the next.
356         */
357        if (pTries != NULL && idx >= tryEnd) {
358            if (tryIndex == pCode->triesSize) {
359                /* no more try blocks in this method */
360                pTries = NULL;
361                numHandlers = 0;
362            } else {
363                /*
364                 * Extract the set of handlers.  We want to avoid doing
365                 * this for each block, so we copy them to local storage.
366                 * If it doesn't fit in the small stack area, we'll use
367                 * the heap instead.
368                 *
369                 * It's rare to encounter a method with more than half a
370                 * dozen possible handlers.
371                 */
372                tryStart = pTries[tryIndex].startAddr;
373                tryEnd = tryStart + pTries[tryIndex].insnCount;
374
375                if (handlerListAlloc != NULL) {
376                    free(handlerListAlloc);
377                    handlerListAlloc = NULL;
378                }
379                numHandlers = extractCatchHandlers(pCode, &pTries[tryIndex],
380                    handlerAddrs, kHandlerStackAllocSize);
381                assert(numHandlers > 0);    // TODO make sure this is verified
382                if (numHandlers <= kHandlerStackAllocSize) {
383                    handlerList = handlerAddrs;
384                } else {
385                    ALOGD("overflow, numHandlers=%d", numHandlers);
386                    handlerListAlloc = (u4*) malloc(sizeof(u4) * numHandlers);
387                    if (handlerListAlloc == NULL)
388                        return false;
389                    extractCatchHandlers(pCode, &pTries[tryIndex],
390                        handlerListAlloc, numHandlers);
391                    handlerList = handlerListAlloc;
392                }
393
394                ALOGV("+++ start=%x end=%x numHan=%d",
395                    tryStart, tryEnd, numHandlers);
396
397                tryIndex++;
398            }
399        }
400
401        /*
402         * Check the current instruction, and possibly aspects of the
403         * next instruction, to see if this instruction ends the current
404         * basic block.
405         *
406         * Instructions that can throw only end the block if there is the
407         * possibility of a local handler catching the exception.
408         */
409        Opcode opcode = dexOpcodeFromCodeUnit(meth->insns[idx]);
410        OpcodeFlags opFlags = dexGetFlagsFromOpcode(opcode);
411        size_t nextIdx = idx + dexGetWidthFromInstruction(&meth->insns[idx]);
412        bool endBB = false;
413        bool ignoreInstr = false;
414
415        if ((opFlags & kInstrCanContinue) == 0) {
416            /* does not continue */
417            endBB = true;
418        } else if ((opFlags & (kInstrCanBranch | kInstrCanSwitch)) != 0) {
419            /* conditionally branches elsewhere */
420            endBB = true;
421        } else if ((opFlags & kInstrCanThrow) != 0 &&
422                dvmInsnIsInTry(insnFlags, idx))
423        {
424            /* throws an exception that might be caught locally */
425            endBB = true;
426        } else if (isDataChunk(meth->insns[idx])) {
427            /*
428             * If this is a data chunk (e.g. switch data) we want to skip
429             * over it entirely.  Set endBB so we don't carry this along as
430             * the start of a block, and ignoreInstr so we don't try to
431             * open a basic block for this instruction.
432             */
433            endBB = ignoreInstr = true;
434        } else if (dvmInsnIsBranchTarget(insnFlags, nextIdx)) {
435            /*
436             * We also need to end it if the next instruction is a branch
437             * target.  Note we've tagged exception catch blocks as such.
438             *
439             * If we're this far along in the "else" chain, we know that
440             * this isn't a data-chunk NOP, and control can continue to
441             * the next instruction, so we're okay examining "nextIdx".
442             */
443            assert(nextIdx < insnsSize);
444            endBB = true;
445        } else if (opcode == OP_NOP && isDataChunk(meth->insns[nextIdx])) {
446            /*
447             * Handle an odd special case: if this is NOP padding before a
448             * data chunk, also treat it as "ignore".  Otherwise it'll look
449             * like a block that starts and doesn't end.
450             */
451            endBB = ignoreInstr = true;
452        } else {
453            /* check: return ops should be caught by absence of can-continue */
454            assert((opFlags & kInstrCanReturn) == 0);
455        }
456
457        if (verbose) {
458            char btc = dvmInsnIsBranchTarget(insnFlags, idx) ? '>' : ' ';
459            char tryc =
460                (pTries != NULL && idx >= tryStart && idx < tryEnd) ? 't' : ' ';
461            bool startBB = (idx == blockStartAddr);
462            const char* startEnd;
463
464
465            if (ignoreInstr)
466                startEnd = "IGNORE";
467            else if (startBB && endBB)
468                startEnd = "START/END";
469            else if (startBB)
470                startEnd = "START";
471            else if (endBB)
472                startEnd = "END";
473            else
474                startEnd = "-";
475
476            LOGI("%04x: %c%c%s #%d", idx, tryc, btc, startEnd, debugBBIndex);
477
478            if (pTries != NULL && idx == tryStart) {
479                assert(numHandlers > 0);
480                LOGI("  EXC block: [%04x, %04x) %d:(%04x...)",
481                    tryStart, tryEnd, numHandlers, handlerList[0]);
482            }
483        }
484
485        if (idx != blockStartAddr) {
486            /* should not be a basic block struct associated with this addr */
487            assert(vdata->basicBlocks[idx] == NULL);
488        }
489        if (endBB) {
490            if (!ignoreInstr) {
491                /*
492                 * Create a new BB if one doesn't already exist.
493                 */
494                VfyBasicBlock* curBlock = vdata->basicBlocks[blockStartAddr];
495                if (curBlock == NULL) {
496                    curBlock = allocVfyBasicBlock(vdata, blockStartAddr);
497                    if (curBlock == NULL)
498                        return false;
499                    vdata->basicBlocks[blockStartAddr] = curBlock;
500                }
501
502                curBlock->firstAddr = blockStartAddr;
503                curBlock->lastAddr = idx;
504
505                if (!setPredecessors(vdata, curBlock, idx, opFlags, nextIdx,
506                        handlerList, numHandlers))
507                {
508                    goto bail;
509                }
510            }
511
512            blockStartAddr = nextIdx;
513            debugBBIndex++;
514        }
515
516        idx = nextIdx;
517    }
518
519    assert(idx == insnsSize);
520
521    result = true;
522
523    if (verbose)
524        dumpBasicBlocks(vdata);
525
526bail:
527    free(handlerListAlloc);
528    return result;
529}
530
531/*
532 * Free the storage used by basic blocks.
533 */
534void dvmFreeVfyBasicBlocks(VerifierData* vdata)
535{
536    unsigned int idx;
537
538    if (vdata->basicBlocks == NULL)
539        return;
540
541    for (idx = 0; idx < vdata->insnsSize; idx++) {
542        VfyBasicBlock* block = vdata->basicBlocks[idx];
543        if (block == NULL)
544            continue;
545
546        dvmPointerSetFree(block->predecessors);
547        free(block);
548    }
549}
550