CodeVerify.h revision 701d2720fa693621a3c0c4d0bdf9e32e3eb8e731
1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Dalvik bytecode verifier.
19 */
20#ifndef _DALVIK_CODEVERIFY
21#define _DALVIK_CODEVERIFY
22
23#include "analysis/VerifySubs.h"
24#include "analysis/VfyBasicBlock.h"
25
26
27/*
28 * Enumeration for register type values.  The "hi" piece of a 64-bit value
29 * MUST immediately follow the "lo" piece in the enumeration, so we can check
30 * that hi==lo+1.
31 *
32 * Assignment of constants:
33 *   [-MAXINT,-32768)   : integer
34 *   [-32768,-128)      : short
35 *   [-128,0)           : byte
36 *   0                  : zero
37 *   1                  : one
38 *   [2,128)            : posbyte
39 *   [128,32768)        : posshort
40 *   [32768,65536)      : char
41 *   [65536,MAXINT]     : integer
42 *
43 * Allowed "implicit" widening conversions:
44 *   zero -> boolean, posbyte, byte, posshort, short, char, integer, ref (null)
45 *   one -> boolean, posbyte, byte, posshort, short, char, integer
46 *   boolean -> posbyte, byte, posshort, short, char, integer
47 *   posbyte -> posshort, short, integer, char
48 *   byte -> short, integer
49 *   posshort -> integer, char
50 *   short -> integer
51 *   char -> integer
52 *
53 * In addition, all of the above can convert to "float".
54 *
55 * We're more careful with integer values than the spec requires.  The
56 * motivation is to restrict byte/char/short to the correct range of values.
57 * For example, if a method takes a byte argument, we don't want to allow
58 * the code to load the constant "1024" and pass it in.
59 */
60enum {
61    kRegTypeUnknown = 0,    /* initial state; use value=0 so calloc works */
62    kRegTypeUninit = 1,     /* MUST be odd to distinguish from pointer */
63    kRegTypeConflict,       /* merge clash makes this reg's type unknowable */
64
65    /*
66     * Category-1nr types.  The order of these is chiseled into a couple
67     * of tables, so don't add, remove, or reorder if you can avoid it.
68     */
69#define kRegType1nrSTART    kRegTypeFloat
70    kRegTypeFloat,
71    kRegTypeZero,           /* 32-bit 0, could be Boolean, Int, Float, or Ref */
72    kRegTypeOne,            /* 32-bit 1, could be Boolean, Int, Float */
73    kRegTypeBoolean,        /* must be 0 or 1 */
74    kRegTypePosByte,        /* byte, known positive (can become char) */
75    kRegTypeByte,
76    kRegTypePosShort,       /* short, known positive (can become char) */
77    kRegTypeShort,
78    kRegTypeChar,
79    kRegTypeInteger,
80#define kRegType1nrEND      kRegTypeInteger
81
82    kRegTypeLongLo,         /* lower-numbered register; endian-independent */
83    kRegTypeLongHi,
84    kRegTypeDoubleLo,
85    kRegTypeDoubleHi,
86
87    /*
88     * Enumeration max; this is used with "full" (32-bit) RegType values.
89     *
90     * Anything larger than this is a ClassObject or uninit ref.  Mask off
91     * all but the low 8 bits; if you're left with kRegTypeUninit, pull
92     * the uninit index out of the high 24.  Because kRegTypeUninit has an
93     * odd value, there is no risk of a particular ClassObject pointer bit
94     * pattern being confused for it (assuming our class object allocator
95     * uses word alignment).
96     */
97    kRegTypeMAX
98};
99#define kRegTypeUninitMask  0xff
100#define kRegTypeUninitShift 8
101
102/*
103 * RegType holds information about the type of data held in a register.
104 * For most types it's a simple enum.  For reference types it holds a
105 * pointer to the ClassObject, and for uninitialized references it holds
106 * an index into the UninitInstanceMap.
107 */
108typedef u4 RegType;
109
110/*
111 * A bit vector indicating which entries in the monitor stack are
112 * associated with this register.  The low bit corresponds to the stack's
113 * bottom-most entry.
114 */
115typedef u4 MonitorEntries;
116#define kMaxMonitorStackDepth   (sizeof(MonitorEntries) * 8)
117
118/*
119 * During verification, we associate one of these with every "interesting"
120 * instruction.  We track the status of all registers, and (if the method
121 * has any monitor-enter instructions) maintain a stack of entered monitors
122 * (identified by code unit offset).
123 */
124typedef struct {
125    RegType*        regTypes;
126    MonitorEntries* monitorEntries;
127    u4*             monitorStack;
128    unsigned int    monitorStackTop;
129} RegisterLine;
130
131/*
132 * Table that maps uninitialized instances to classes, based on the
133 * address of the new-instance instruction.  One per method.
134 */
135typedef struct UninitInstanceMap {
136    int numEntries;
137    struct {
138        int             addr;   /* code offset, or -1 for method arg ("this") */
139        ClassObject*    clazz;  /* class created at this address */
140    } map[1];
141} UninitInstanceMap;
142#define kUninitThisArgAddr  (-1)
143#define kUninitThisArgSlot  0
144
145/*
146 * Various bits of data used by the verifier and register map generator.
147 */
148typedef struct VerifierData {
149    /*
150     * The method we're working on.
151     */
152    const Method*   method;
153
154    /*
155     * Number of code units of instructions in the method.  A cache of the
156     * value calculated by dvmGetMethodInsnsSize().
157     */
158    u4              insnsSize;
159
160    /*
161     * Number of registers we track for each instruction.  This is equal
162     * to the method's declared "registersSize".  (Does not include the
163     * pending return value.)
164     */
165    u4              insnRegCount;
166
167    /*
168     * Instruction widths and flags, one entry per code unit.
169     */
170    InsnFlags*      insnFlags;
171
172    /*
173     * Uninitialized instance map, used for tracking the movement of
174     * objects that have been allocated but not initialized.
175     */
176    UninitInstanceMap* uninitMap;
177
178    /*
179     * Array of RegisterLine structs, one entry per code unit.  We only need
180     * entries for code units that hold the start of an "interesting"
181     * instruction.  For register map generation, we're only interested
182     * in GC points.
183     */
184    RegisterLine*   registerLines;
185
186    /*
187     * The number of occurrences of specific opcodes.
188     */
189    size_t          newInstanceCount;
190    size_t          monitorEnterCount;
191
192    /*
193     * Array of pointers to basic blocks, one entry per code unit.  Used
194     * for liveness analysis.
195     */
196    VfyBasicBlock** basicBlocks;
197} VerifierData;
198
199
200/* table with static merge logic for primitive types */
201extern const char gDvmMergeTab[kRegTypeMAX][kRegTypeMAX];
202
203
204/*
205 * Returns "true" if the flags indicate that this address holds the start
206 * of an instruction.
207 */
208INLINE bool dvmInsnIsOpcode(const InsnFlags* insnFlags, int addr) {
209    return (insnFlags[addr] & kInsnFlagWidthMask) != 0;
210}
211
212/*
213 * Extract the unsigned 16-bit instruction width from "flags".
214 */
215INLINE int dvmInsnGetWidth(const InsnFlags* insnFlags, int addr) {
216    return insnFlags[addr] & kInsnFlagWidthMask;
217}
218
219/*
220 * Changed?
221 */
222INLINE bool dvmInsnIsChanged(const InsnFlags* insnFlags, int addr) {
223    return (insnFlags[addr] & kInsnFlagChanged) != 0;
224}
225INLINE void dvmInsnSetChanged(InsnFlags* insnFlags, int addr, bool changed)
226{
227    if (changed)
228        insnFlags[addr] |= kInsnFlagChanged;
229    else
230        insnFlags[addr] &= ~kInsnFlagChanged;
231}
232
233/*
234 * Visited?
235 */
236INLINE bool dvmInsnIsVisited(const InsnFlags* insnFlags, int addr) {
237    return (insnFlags[addr] & kInsnFlagVisited) != 0;
238}
239INLINE void dvmInsnSetVisited(InsnFlags* insnFlags, int addr, bool changed)
240{
241    if (changed)
242        insnFlags[addr] |= kInsnFlagVisited;
243    else
244        insnFlags[addr] &= ~kInsnFlagVisited;
245}
246
247/*
248 * Visited or changed?
249 */
250INLINE bool dvmInsnIsVisitedOrChanged(const InsnFlags* insnFlags, int addr) {
251    return (insnFlags[addr] & (kInsnFlagVisited|kInsnFlagChanged)) != 0;
252}
253
254/*
255 * In a "try" block?
256 */
257INLINE bool dvmInsnIsInTry(const InsnFlags* insnFlags, int addr) {
258    return (insnFlags[addr] & kInsnFlagInTry) != 0;
259}
260INLINE void dvmInsnSetInTry(InsnFlags* insnFlags, int addr, bool inTry)
261{
262    assert(inTry);
263    //if (inTry)
264        insnFlags[addr] |= kInsnFlagInTry;
265    //else
266    //    insnFlags[addr] &= ~kInsnFlagInTry;
267}
268
269/*
270 * Instruction is a branch target or exception handler?
271 */
272INLINE bool dvmInsnIsBranchTarget(const InsnFlags* insnFlags, int addr) {
273    return (insnFlags[addr] & kInsnFlagBranchTarget) != 0;
274}
275INLINE void dvmInsnSetBranchTarget(InsnFlags* insnFlags, int addr,
276    bool isBranch)
277{
278    assert(isBranch);
279    //if (isBranch)
280        insnFlags[addr] |= kInsnFlagBranchTarget;
281    //else
282    //    insnFlags[addr] &= ~kInsnFlagBranchTarget;
283}
284
285/*
286 * Instruction is a GC point?
287 */
288INLINE bool dvmInsnIsGcPoint(const InsnFlags* insnFlags, int addr) {
289    return (insnFlags[addr] & kInsnFlagGcPoint) != 0;
290}
291INLINE void dvmInsnSetGcPoint(InsnFlags* insnFlags, int addr,
292    bool isGcPoint)
293{
294    assert(isGcPoint);
295    //if (isGcPoint)
296        insnFlags[addr] |= kInsnFlagGcPoint;
297    //else
298    //    insnFlags[addr] &= ~kInsnFlagGcPoint;
299}
300
301
302/*
303 * Create a new UninitInstanceMap.
304 */
305UninitInstanceMap* dvmCreateUninitInstanceMap(const Method* meth,
306    const InsnFlags* insnFlags, int newInstanceCount);
307
308/*
309 * Release the storage associated with an UninitInstanceMap.
310 */
311void dvmFreeUninitInstanceMap(UninitInstanceMap* uninitMap);
312
313/*
314 * Verify bytecode in "meth".  "insnFlags" should be populated with
315 * instruction widths and "in try" flags.
316 */
317bool dvmVerifyCodeFlow(VerifierData* vdata);
318
319#endif /*_DALVIK_CODEVERIFY*/
320