1/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Dalvik bytecode verifier.
19 */
20#ifndef DALVIK_CODEVERIFY_H_
21#define DALVIK_CODEVERIFY_H_
22
23#include "analysis/VerifySubs.h"
24#include "analysis/VfyBasicBlock.h"
25
26/*
27 * Enumeration for register type values.  The "hi" piece of a 64-bit value
28 * MUST immediately follow the "lo" piece in the enumeration, so we can check
29 * that hi==lo+1.
30 *
31 * Assignment of constants:
32 *   [-MAXINT,-32768)   : integer
33 *   [-32768,-128)      : short
34 *   [-128,0)           : byte
35 *   0                  : zero
36 *   1                  : one
37 *   [2,128)            : posbyte
38 *   [128,32768)        : posshort
39 *   [32768,65536)      : char
40 *   [65536,MAXINT]     : integer
41 *
42 * Allowed "implicit" widening conversions:
43 *   zero -> boolean, posbyte, byte, posshort, short, char, integer, ref (null)
44 *   one -> boolean, posbyte, byte, posshort, short, char, integer
45 *   boolean -> posbyte, byte, posshort, short, char, integer
46 *   posbyte -> posshort, short, integer, char
47 *   byte -> short, integer
48 *   posshort -> integer, char
49 *   short -> integer
50 *   char -> integer
51 *
52 * In addition, all of the above can convert to "float".
53 *
54 * We're more careful with integer values than the spec requires.  The
55 * motivation is to restrict byte/char/short to the correct range of values.
56 * For example, if a method takes a byte argument, we don't want to allow
57 * the code to load the constant "1024" and pass it in.
58 */
59enum {
60    kRegTypeUnknown = 0,    /* initial state; use value=0 so calloc works */
61    kRegTypeUninit = 1,     /* MUST be odd to distinguish from pointer */
62    kRegTypeConflict,       /* merge clash makes this reg's type unknowable */
63
64    /*
65     * Category-1nr types.  The order of these is chiseled into a couple
66     * of tables, so don't add, remove, or reorder if you can avoid it.
67     */
68#define kRegType1nrSTART    kRegTypeZero
69    kRegTypeZero,           /* 32-bit 0, could be Boolean, Int, Float, or Ref */
70    kRegTypeOne,            /* 32-bit 1, could be Boolean, Int, Float */
71    kRegTypeBoolean,        /* must be 0 or 1 */
72    kRegTypeConstPosByte,   /* const derived byte, known positive */
73    kRegTypeConstByte,      /* const derived byte */
74    kRegTypeConstPosShort,  /* const derived short, known positive */
75    kRegTypeConstShort,     /* const derived short */
76    kRegTypeConstChar,      /* const derived char */
77    kRegTypeConstInteger,   /* const derived integer */
78    kRegTypePosByte,        /* byte, known positive (can become char) */
79    kRegTypeByte,
80    kRegTypePosShort,       /* short, known positive (can become char) */
81    kRegTypeShort,
82    kRegTypeChar,
83    kRegTypeInteger,
84    kRegTypeFloat,
85#define kRegType1nrEND      kRegTypeFloat
86
87    kRegTypeConstLo,        /* const derived wide, lower half */
88    kRegTypeConstHi,        /* const derived wide, upper half */
89    kRegTypeLongLo,         /* lower-numbered register; endian-independent */
90    kRegTypeLongHi,
91    kRegTypeDoubleLo,
92    kRegTypeDoubleHi,
93
94    /*
95     * Enumeration max; this is used with "full" (32-bit) RegType values.
96     *
97     * Anything larger than this is a ClassObject or uninit ref.  Mask off
98     * all but the low 8 bits; if you're left with kRegTypeUninit, pull
99     * the uninit index out of the high 24.  Because kRegTypeUninit has an
100     * odd value, there is no risk of a particular ClassObject pointer bit
101     * pattern being confused for it (assuming our class object allocator
102     * uses word alignment).
103     */
104    kRegTypeMAX
105};
106#define kRegTypeUninitMask  0xff
107#define kRegTypeUninitShift 8
108
109/*
110 * RegType holds information about the type of data held in a register.
111 * For most types it's a simple enum.  For reference types it holds a
112 * pointer to the ClassObject, and for uninitialized references it holds
113 * an index into the UninitInstanceMap.
114 */
115typedef u4 RegType;
116
117/*
118 * A bit vector indicating which entries in the monitor stack are
119 * associated with this register.  The low bit corresponds to the stack's
120 * bottom-most entry.
121 */
122typedef u4 MonitorEntries;
123#define kMaxMonitorStackDepth   (sizeof(MonitorEntries) * 8)
124
125/*
126 * During verification, we associate one of these with every "interesting"
127 * instruction.  We track the status of all registers, and (if the method
128 * has any monitor-enter instructions) maintain a stack of entered monitors
129 * (identified by code unit offset).
130 *
131 * If live-precise register maps are enabled, the "liveRegs" vector will
132 * be populated.  Unlike the other lists of registers here, we do not
133 * track the liveness of the method result register (which is not visible
134 * to the GC).
135 */
136struct RegisterLine {
137    RegType*        regTypes;
138    MonitorEntries* monitorEntries;
139    u4*             monitorStack;
140    unsigned int    monitorStackTop;
141    BitVector*      liveRegs;
142};
143
144/*
145 * Table that maps uninitialized instances to classes, based on the
146 * address of the new-instance instruction.  One per method.
147 */
148struct UninitInstanceMap {
149    int numEntries;
150    struct {
151        int             addr;   /* code offset, or -1 for method arg ("this") */
152        ClassObject*    clazz;  /* class created at this address */
153    } map[1];
154};
155#define kUninitThisArgAddr  (-1)
156#define kUninitThisArgSlot  0
157
158/*
159 * Various bits of data used by the verifier and register map generator.
160 */
161struct VerifierData {
162    /*
163     * The method we're working on.
164     */
165    const Method*   method;
166
167    /*
168     * Number of code units of instructions in the method.  A cache of the
169     * value calculated by dvmGetMethodInsnsSize().
170     */
171    u4              insnsSize;
172
173    /*
174     * Number of registers we track for each instruction.  This is equal
175     * to the method's declared "registersSize".  (Does not include the
176     * pending return value.)
177     */
178    u4              insnRegCount;
179
180    /*
181     * Instruction widths and flags, one entry per code unit.
182     */
183    InsnFlags*      insnFlags;
184
185    /*
186     * Uninitialized instance map, used for tracking the movement of
187     * objects that have been allocated but not initialized.
188     */
189    UninitInstanceMap* uninitMap;
190
191    /*
192     * Array of RegisterLine structs, one entry per code unit.  We only need
193     * entries for code units that hold the start of an "interesting"
194     * instruction.  For register map generation, we're only interested
195     * in GC points.
196     */
197    RegisterLine*   registerLines;
198
199    /*
200     * The number of occurrences of specific opcodes.
201     */
202    size_t          newInstanceCount;
203    size_t          monitorEnterCount;
204
205    /*
206     * Array of pointers to basic blocks, one entry per code unit.  Used
207     * for liveness analysis.
208     */
209    VfyBasicBlock** basicBlocks;
210};
211
212
213/* table with static merge logic for primitive types */
214extern const char gDvmMergeTab[kRegTypeMAX][kRegTypeMAX];
215
216
217/*
218 * Returns "true" if the flags indicate that this address holds the start
219 * of an instruction.
220 */
221INLINE bool dvmInsnIsOpcode(const InsnFlags* insnFlags, int addr) {
222    return (insnFlags[addr] & kInsnFlagWidthMask) != 0;
223}
224
225/*
226 * Extract the unsigned 16-bit instruction width from "flags".
227 */
228INLINE int dvmInsnGetWidth(const InsnFlags* insnFlags, int addr) {
229    return insnFlags[addr] & kInsnFlagWidthMask;
230}
231
232/*
233 * Changed?
234 */
235INLINE bool dvmInsnIsChanged(const InsnFlags* insnFlags, int addr) {
236    return (insnFlags[addr] & kInsnFlagChanged) != 0;
237}
238INLINE void dvmInsnSetChanged(InsnFlags* insnFlags, int addr, bool changed)
239{
240    if (changed)
241        insnFlags[addr] |= kInsnFlagChanged;
242    else
243        insnFlags[addr] &= ~kInsnFlagChanged;
244}
245
246/*
247 * Visited?
248 */
249INLINE bool dvmInsnIsVisited(const InsnFlags* insnFlags, int addr) {
250    return (insnFlags[addr] & kInsnFlagVisited) != 0;
251}
252INLINE void dvmInsnSetVisited(InsnFlags* insnFlags, int addr, bool changed)
253{
254    if (changed)
255        insnFlags[addr] |= kInsnFlagVisited;
256    else
257        insnFlags[addr] &= ~kInsnFlagVisited;
258}
259
260/*
261 * Visited or changed?
262 */
263INLINE bool dvmInsnIsVisitedOrChanged(const InsnFlags* insnFlags, int addr) {
264    return (insnFlags[addr] & (kInsnFlagVisited|kInsnFlagChanged)) != 0;
265}
266
267/*
268 * In a "try" block?
269 */
270INLINE bool dvmInsnIsInTry(const InsnFlags* insnFlags, int addr) {
271    return (insnFlags[addr] & kInsnFlagInTry) != 0;
272}
273INLINE void dvmInsnSetInTry(InsnFlags* insnFlags, int addr, bool inTry)
274{
275    assert(inTry);
276    //if (inTry)
277        insnFlags[addr] |= kInsnFlagInTry;
278    //else
279    //    insnFlags[addr] &= ~kInsnFlagInTry;
280}
281
282/*
283 * Instruction is a branch target or exception handler?
284 */
285INLINE bool dvmInsnIsBranchTarget(const InsnFlags* insnFlags, int addr) {
286    return (insnFlags[addr] & kInsnFlagBranchTarget) != 0;
287}
288INLINE void dvmInsnSetBranchTarget(InsnFlags* insnFlags, int addr,
289    bool isBranch)
290{
291    assert(isBranch);
292    //if (isBranch)
293        insnFlags[addr] |= kInsnFlagBranchTarget;
294    //else
295    //    insnFlags[addr] &= ~kInsnFlagBranchTarget;
296}
297
298/*
299 * Instruction is a GC point?
300 */
301INLINE bool dvmInsnIsGcPoint(const InsnFlags* insnFlags, int addr) {
302    return (insnFlags[addr] & kInsnFlagGcPoint) != 0;
303}
304INLINE void dvmInsnSetGcPoint(InsnFlags* insnFlags, int addr,
305    bool isGcPoint)
306{
307    assert(isGcPoint);
308    //if (isGcPoint)
309        insnFlags[addr] |= kInsnFlagGcPoint;
310    //else
311    //    insnFlags[addr] &= ~kInsnFlagGcPoint;
312}
313
314
315/*
316 * Create a new UninitInstanceMap.
317 */
318UninitInstanceMap* dvmCreateUninitInstanceMap(const Method* meth,
319    const InsnFlags* insnFlags, int newInstanceCount);
320
321/*
322 * Release the storage associated with an UninitInstanceMap.
323 */
324void dvmFreeUninitInstanceMap(UninitInstanceMap* uninitMap);
325
326/*
327 * Verify bytecode in "meth".  "insnFlags" should be populated with
328 * instruction widths and "in try" flags.
329 */
330bool dvmVerifyCodeFlow(VerifierData* vdata);
331
332#endif  // DALVIK_CODEVERIFY_H_
333